Imported Upstream version 1.15.0

author Chunseok Lee <chunseok.lee@samsung.com>

Tue, 20 Apr 2021 09:01:41 +0000 (18:01 +0900)

committer Chunseok Lee <chunseok.lee@samsung.com>

Tue, 20 Apr 2021 09:01:41 +0000 (18:01 +0900)
author Chunseok Lee <chunseok.lee@samsung.com>
Tue, 20 Apr 2021 09:01:41 +0000 (18:01 +0900)
committer Chunseok Lee <chunseok.lee@samsung.com>
Tue, 20 Apr 2021 09:01:41 +0000 (18:01 +0900)
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml

index cd34d792f342c2517152030def252ce6ce442995..a42e236451bfd9894a6fe10c67af4565751867db 100644 (file)
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -5,7 +5,7 @@ test:
      testFW: GTEST
      testCaseFolder:
        - ./compute/test/cker
-      - ./runtime/onert/core/src/backend/cpu_common
+      - ./runtime/onert/core/src/backend/basic
        - ./runtime/onert/frontend/nnapi
        - ./runtime/onert/test/core/compiler
        - ./runtime/onert/test/core/exec
@@ -31,7 +31,7 @@ test:
          - functionName:
              starts:
                - TEST
- 
+
      negativeTestCase:
        - condition:
          - testName:
diff --git a/.clang-format b/.clang-format

index 5699ccff849a1de4a42bf2483b2523bcf3c0e5f6..9243c9a2b211baeaf1989ecd8807c1f4ded0ff67 100644 (file)
--- a/.clang-format
+++ b/.clang-format
@@ -1,4 +1,3 @@
----
  Language:        Cpp
  BasedOnStyle: Google
  AccessModifierOffset: -2
@@ -21,17 +20,18 @@ AlwaysBreakTemplateDeclarations: false
  BinPackArguments: true
  BinPackParameters: true
  BraceWrapping:
-  AfterClass:      true
-  AfterControlStatement: true
-  AfterEnum:       true
-  AfterFunction:   true
-  AfterNamespace:  true
-  AfterObjCDeclaration: false
-  AfterStruct:     true
-  AfterUnion:      false
-  BeforeCatch:     true
-  BeforeElse:      true
-  IndentBraces:    false
+  AfterClass:             true
+  AfterControlStatement:  true
+  AfterEnum:              true
+  AfterFunction:          true
+  AfterNamespace:         true
+  AfterObjCDeclaration:   false
+  AfterStruct:            true
+  AfterUnion:             false
+  AfterExternBlock:       false
+  BeforeCatch:            true
+  BeforeElse:             true
+  IndentBraces:           false
  BreakBeforeBraces: Custom
  BreakBeforeTernaryOperators: true
  BreakConstructorInitializersBeforeComma: false
@@ -40,12 +40,13 @@ BreakStringLiterals: true
  ColumnLimit:     100
  CommentPragmas:  '^ IWYU pragma:'
  ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
+ConstructorInitializerIndentWidth: 2
+ContinuationIndentWidth: 2
  Cpp11BracedListStyle: true
  DerivePointerAlignment: false
  DisableFormat:   false
  ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
  IncludeCategories:
    - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
      Priority:        2
@@ -75,6 +76,7 @@ PenaltyReturnTypeOnItsOwnLine: 60
  PointerAlignment: Right
  ReflowComments:  true
  SortIncludes:    false
+SortUsingDeclarations: false
  SpaceAfterCStyleCast: false
  SpaceBeforeAssignmentOperators: true
  SpaceBeforeParens: ControlStatements
@@ -86,5 +88,5 @@ SpacesInCStyleCastParentheses: false
  SpacesInParentheses: false
  SpacesInSquareBrackets: false
  Standard:        Cpp11
-TabWidth:        4
+TabWidth:        2
  UseTab:          Never
diff --git a/.clang-format.8 b/.clang-format.8

deleted file mode 100644 (file)

index d2db976..0000000
--- a/.clang-format.8
+++ /dev/null
@@ -1,92 +0,0 @@
-Language:        Cpp
-BasedOnStyle: Google
-AccessModifierOffset: -2
-AlignAfterOpenBracket: Align
-AlignEscapedNewlinesLeft: true
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignOperands:   true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: false
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: false
-AlwaysBreakTemplateDeclarations: false
-BinPackArguments: true
-BinPackParameters: true
-BraceWrapping:
-  AfterClass:             true
-  AfterControlStatement:  true
-  AfterEnum:              true
-  AfterFunction:          true
-  AfterNamespace:         true
-  AfterObjCDeclaration:   false
-  AfterStruct:            true
-  AfterUnion:             false
-  AfterExternBlock:       false
-  BeforeCatch:            true
-  BeforeElse:             true
-  IndentBraces:           false
-BreakBeforeBraces: Custom
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit:     100
-CommentPragmas:  '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 2
-ContinuationIndentWidth: 2
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-DisableFormat:   false
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: false
-IncludeCategories:
-  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
-    Priority:        2
-  - Regex:           '^(<|"(gtest|isl|json)/)'
-    Priority:        3
-  - Regex:           '.*'
-    Priority:        1
-IndentCaseLabels: true
-IndentWidth:     2
-IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: true
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PenaltyBreakBeforeFirstCallParameter: 19
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 60
-PointerAlignment: Right
-ReflowComments:  true
-SortIncludes:    false
-SortUsingDeclarations: false
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 1
-SpacesInAngles:  false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard:        Cpp11
-TabWidth:        2
-UseTab:          Never
diff --git a/Makefile.template b/Makefile.template

index 8e88e9092a9a1cc086db4c551f01b41c168701a7..0d601ded193ef97df448cc262a35852687dffc95 100644 (file)
--- a/Makefile.template
+++ b/Makefile.template
@@ -130,7 +130,7 @@ ifneq ($(EXT_ACL_FOLDER),)
         mkdir -p $(OVERLAY_FOLDER)/lib
         cp $(EXT_ACL_FOLDER)/* $(OVERLAY_FOLDER)/lib
  # Make stamp file
-       printf "20.05" > $(OVERLAY_FOLDER)/ARMCOMPUTE.stamp
+       printf "21.02" > $(OVERLAY_FOLDER)/ARMCOMPUTE.stamp
  endif
  
         NNFW_WORKSPACE="$(WORKSPACE)" NNFW_INSTALL_PREFIX=$(INSTALL_PATH) ./nnfw configure \
diff --git a/README.md b/README.md

index bbeb66c820976e95c9a36d606ac57b9960fba857..e3ed259c716ff94219fb1c58c9240b80bcb5f0df 100644 (file)
--- a/README.md
+++ b/README.md
@@ -54,8 +54,3 @@ Thank you in advance!
  
  - Please post questions, issues, or suggestions into [Issues](https://github.com/Samsung/ONE/issues). This is the best way to communicate with the developer.
  - You can also have an open discussion with community members through [gitter.im](https://gitter.im/Samsung/ONE) channel.
-
-## Hall of Fame
-
-[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/0)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/0)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/1)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/1)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/2)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/2)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/3)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/3)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/4)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/4)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/5)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/5)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/6)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/6)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/7)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/7)
-
diff --git a/compiler/.ahub/tcchecker-tca/config.yaml b/compiler/.ahub/tcchecker-tca/config.yaml

index ef681de1a19c7c33d818818722f1821a9533c85f..9d3e6b8ebf4f877921d257c9613d08b3e36a03bb 100644 (file)
--- a/compiler/.ahub/tcchecker-tca/config.yaml
+++ b/compiler/.ahub/tcchecker-tca/config.yaml
@@ -8,6 +8,7 @@ test:
        - ./arser
        - ./circle2circle
        - ./circle-quantizer
+      - ./crew
        - ./cwrap
        - ./foder
        - ./hermes
@@ -19,6 +20,8 @@ test:
        - ./logo-core
        - ./luci
        - ./luci-interpreter
+      - ./luci-eval-driver
+      - ./luci-pass-value-test
        - ./luci-value-test
        - ./mio-circle
        - ./mio-tflite
@@ -30,7 +33,6 @@ test:
        - ./record-minmax
        - ./safemain
        - ./souschef
-      - ./stdex
        - ./tflite2circle
  
      testFile:
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h b/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h

index 93d86f56b72530f49be25ab748e3c6992ffb86d2..0af13c56adc5d642d484e7920d83012e22876522 100644 (file)
--- a/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h
@@ -34,7 +34,7 @@ template <typename T> class Overlay final : public View<T>
  {
  public:
    explicit Overlay(const Shape &shape, const Layout &layout, T *base)
-      : View<T>{shape, layout}, _base{base}
+    : View<T>{shape, layout}, _base{base}
    {
      // DO NOTHING
    }
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Shape.h b/compiler/angkor/include/nncc/core/ADT/feature/Shape.h

index 31932630891e8790cd4759cf6d13d984d87dd632..7d086b9b7ddad5e9946646c4da01445b23423a7c 100644 (file)
--- a/compiler/angkor/include/nncc/core/ADT/feature/Shape.h
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Shape.h
@@ -35,7 +35,7 @@ class Shape
  {
  public:
    Shape(uint32_t depth, uint32_t height, uint32_t width)
-      : _depth{depth}, _height{height}, _width{width}
+    : _depth{depth}, _height{height}, _width{width}
    {
      // DO NOTHING
    }
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h b/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h

index e348a8769771dd5ce370974ccecf9118e48f62d2..0684277fa7d71d9d1b1970fa1e90200561f4b3e2 100644 (file)
--- a/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h
@@ -35,7 +35,7 @@ template <typename T, typename InputIt> class Overlay final : public View<T>
  {
  public:
    explicit Overlay(const Shape &shape, const Layout &layout, InputIt it)
-      : _impl{shape, layout}, _it{it}
+    : _impl{shape, layout}, _it{it}
    {
      // DO NOTHING
    }
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h b/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h

index d485d526b32ee6c88e9107aa2f235d6b146c68c0..92f90970a677f05cb9c59d56fc26e81cb8061f5e 100644 (file)
--- a/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h
@@ -35,7 +35,7 @@ class Shape
  {
  public:
    Shape(uint32_t count, uint32_t depth, uint32_t height, uint32_t width)
-      : _count{count}, _depth{depth}, _height{height}, _width{width}
+    : _count{count}, _depth{depth}, _height{height}, _width{width}
    {
      // DO NOTHING
    }
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h b/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h

index 11ee5350c30b090c361645164e7f3db3860b0f3a..5fa36bbc9f17da601c2920cb5a02c3761daf1c44 100644 (file)
--- a/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h
@@ -32,7 +32,7 @@ template <typename T> class Overlay final : public View<T>
  {
  public:
    explicit Overlay(const Shape &shape, const Layout &layout, T *base)
-      : View<T>{shape, layout}, _base{base}
+    : View<T>{shape, layout}, _base{base}
    {
      // DO NOTHING
    }
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/View.h b/compiler/angkor/include/nncc/core/ADT/tensor/View.h

index 4c9a91539805ec1efdb17b43ae31a9f1884cfc3e..8407df3bece54f2f2b45a41b83c5f3417bc728c6 100644 (file)
--- a/compiler/angkor/include/nncc/core/ADT/tensor/View.h
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/View.h
@@ -36,7 +36,7 @@ template <typename T> class View : public Reader<T>, public Accessor<T>
  {
  public:
    explicit View(const Shape &shape, const Layout &layout)
-      : _shape{shape}, _layout{std::move(layout)}
+    : _shape{shape}, _layout{std::move(layout)}
    {
      // DO NOTHING
    }
diff --git a/compiler/angkor/src/ADT/feature/Overlay.test.cpp b/compiler/angkor/src/ADT/feature/Overlay.test.cpp

index 8ba28bf5aa7ef01bebe74e9ef33d65cdf97809be..1ac62f856b908e554b888790eca4886301a46ba5 100644 (file)
--- a/compiler/angkor/src/ADT/feature/Overlay.test.cpp
+++ b/compiler/angkor/src/ADT/feature/Overlay.test.cpp
@@ -30,7 +30,7 @@ TEST(ADT_FEATURE_OVERLAY, ctor)
    const Shape shape{4, 6, 3};
  
    int data[4 * 6 * 3] = {
-      0,
+    0,
    };
    auto overlay = make_overlay<int, CHWLayout>(shape, data);
  
@@ -44,7 +44,7 @@ TEST(ADT_FEATURE_OVERLAY, read)
    const Shape shape{4, 6, 3};
  
    int data[4 * 6 * 3] = {
-      0,
+    0,
    };
    const auto overlay = make_overlay<int, CHWLayout>(shape, data);
  
@@ -60,7 +60,7 @@ TEST(ADT_FEATURE_OVERLAY, access)
    const Shape shape{4, 6, 3};
  
    int data[4 * 6 * 3] = {
-      0,
+    0,
    };
    auto overlay = make_overlay<int, CHWLayout>(shape, data);
  
diff --git a/compiler/angkor/src/ADT/kernel/Overlay.test.cpp b/compiler/angkor/src/ADT/kernel/Overlay.test.cpp

index 4e9bd8dbd7a41701a6176cdd4a3057f37414bfb9..7129fe2427aec95b79681a508dbeaa55d77b318e 100644 (file)
--- a/compiler/angkor/src/ADT/kernel/Overlay.test.cpp
+++ b/compiler/angkor/src/ADT/kernel/Overlay.test.cpp
@@ -30,7 +30,7 @@ TEST(ADT_KERNEL_OVERLAY, ctor)
    const Shape shape{2, 4, 6, 3};
  
    int data[2 * 4 * 6 * 3] = {
-      0,
+    0,
    };
    auto overlay = make_overlay<int, NCHWLayout>(shape, data);
  
@@ -45,7 +45,7 @@ TEST(ADT_KERNEL_OVERLAY, read)
    const Shape shape{2, 4, 6, 3};
  
    int data[2 * 4 * 6 * 3] = {
-      0,
+    0,
    };
    const auto overlay = make_overlay<int, NCHWLayout>(shape, data);
  
@@ -61,7 +61,7 @@ TEST(ADT_KERNEL_OVERLAY, access)
    const Shape shape{2, 4, 6, 3};
  
    int data[2 * 4 * 6 * 3] = {
-      0,
+    0,
    };
    auto overlay = make_overlay<int, NCHWLayout>(shape, data);
  
diff --git a/compiler/angkor/src/ADT/tensor/Overlay.test.cpp b/compiler/angkor/src/ADT/tensor/Overlay.test.cpp

index 57cd1e6f91ba746ba27abc05ada2699787f7a691..d5369dffc36f26cd70d5a74cd8790b585e02e6b6 100644 (file)
--- a/compiler/angkor/src/ADT/tensor/Overlay.test.cpp
+++ b/compiler/angkor/src/ADT/tensor/Overlay.test.cpp
@@ -31,7 +31,7 @@ TEST(ADT_TENSOR_OVERLAY, ctor)
    const Shape shape{2, 3};
  
    int data[2 * 3] = {
-      0,
+    0,
    };
    auto view = make_overlay<int, LexicalLayout>(shape, data);
  
@@ -43,7 +43,7 @@ TEST(ADT_TENSOR_OVERLAY, read)
    const Shape shape{2, 3};
  
    int data[2 * 3] = {
-      0,
+    0,
    };
    const auto view = make_overlay<int, LexicalLayout>(shape, data);
  
@@ -61,7 +61,7 @@ TEST(ADT_TENSOR_OVERLAY, access)
    const Shape shape{2, 3};
  
    int data[2 * 3] = {
-      0,
+    0,
    };
    auto view = make_overlay<int, LexicalLayout>(shape, data);
  
diff --git a/compiler/arser/CMakeLists.txt b/compiler/arser/CMakeLists.txt

index 63d19f53843c1442a8a61abc198a99f084e8a52a..7eda21564dd4f0fccf37d539a819edb38b637abe 100644 (file)
--- a/compiler/arser/CMakeLists.txt
+++ b/compiler/arser/CMakeLists.txt
@@ -4,12 +4,14 @@ add_library(arser INTERFACE)
  # It means that a developer who want to link arser just need to add one line.
  # target_link_library(another-users-target arser) 
  target_include_directories(arser INTERFACE include/)
+target_link_libraries(arser INTERFACE nncc_coverage)
  
  if(NOT ENABLE_TEST)
    return()
  endif(NOT ENABLE_TEST)
  
  nnas_find_package(GTest REQUIRED)
-set(TESTS "${CMAKE_CURRENT_SOURCE_DIR}/tests/arser.test.cpp")
+set(TESTS "${CMAKE_CURRENT_SOURCE_DIR}/tests/arser.test.cpp"
+          "${CMAKE_CURRENT_SOURCE_DIR}/tests/HelpMessage.test.cpp")
  GTest_AddTest(arser_test ${TESTS})
-target_include_directories(arser_test PRIVATE include)
+target_link_libraries(arser_test arser)
diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h

index 64bb557c4cffd431af5356ebc3092faa3d83042b..f2a7a2b85899b678a2776493532335e0fbde057b 100644 (file)
--- a/compiler/arser/include/arser/arser.h
+++ b/compiler/arser/include/arser/arser.h
@@ -14,6 +14,9 @@
   * limitations under the License.
   */
  
+#ifndef __ARSER_H__
+#define __ARSER_H__
+
  #include <iostream>
  #include <sstream>
  
@@ -29,7 +32,11 @@
  
  #include <cstring>
  
-namespace
+#include <cassert>
+
+namespace arser
+{
+namespace internal
  {
  
  template <typename T> T lexical_cast(const std::string &str)
@@ -41,7 +48,7 @@ template <typename T> T lexical_cast(const std::string &str)
    return data;
  }
  
-template <> bool lexical_cast(const std::string &str)
+template <> inline bool lexical_cast(const std::string &str)
  {
    bool data = true;
    if (str == "false" || str == "False" || str == "FALSE" || str == "0")
@@ -55,7 +62,33 @@ template <> inline std::string to_string(const char *value) { return std::string
  
  template <> inline std::string to_string(const bool value) { return value ? "true" : "false"; }
  
-} // namespace
+/**
+ * @brief Returns the string with the leading dash removed.
+ *
+ * If there is no dash, it returns as it is.
+ */
+inline std::string remove_dash(const std::string &str)
+{
+  std::string ret{str};
+  auto pos = ret.find_first_not_of('-');
+  if (pos == std::string::npos)
+    return ret;
+  return ret.substr(pos);
+}
+
+/**
+ * @brief Returns the string that created by concatenating the elements of a vector with commas.
+ */
+inline std::string make_comma_concatenated(const std::vector<std::string> &vec)
+{
+  std::ostringstream oss;
+  std::copy(vec.begin(), std::prev(vec.end()), std::ostream_iterator<std::string>(oss, ", "));
+  oss << vec.back();
+  return oss.str();
+}
+
+} // namespace internal
+} // namespace arser
  
  namespace arser
  {
@@ -116,10 +149,41 @@ enum class DataType
  
  class Arser;
  
+/**
+ * Argument
+ *   ├── positional argument
+ *   └── optioanl argument  [ dash at the beginning of the string ]
+ *       ├── long option    [ two or more dashes ]
+ *       └── short option   [ one dash ]
+ *
+ * Argument has two types - positional argument, optional argument.
+ *
+ * The way to distinguish the two types is whether there is a dash('-') at the beginning of the
+ * string.
+ *
+ * And, optional argument has two types as well - long option, short option, which is distinguished
+ * by the number of dash.
+ */
  class Argument
  {
  public:
-  explicit Argument(const std::string &arg_name) : _name{arg_name} {}
+  explicit Argument(const std::string &arg_name) : _long_name{arg_name}, _names{arg_name} {}
+  explicit Argument(const std::string &short_name, const std::string &long_name)
+    : _short_name{short_name}, _long_name{long_name}, _names{short_name, long_name}
+  {
+  }
+  explicit Argument(const std::string &short_name, const std::string &long_name,
+                    const std::vector<std::string> &names)
+    : _short_name{short_name}, _long_name{long_name}, _names{names}
+  {
+    // 'names' must have 'short_name' and 'long_name'.
+    auto it = std::find(names.begin(), names.end(), short_name);
+    assert(it != names.end());
+    it = std::find(names.begin(), names.end(), long_name);
+    assert(it != names.end());
+    // for avoiding unused warning.
+    (void)it;
+  }
  
    Argument &nargs(uint32_t num)
    {
@@ -190,7 +254,7 @@ public:
    {
      if ((_nargs <= 1 && TypeName<T>::Get() == _type) ||
          (_nargs > 1 && TypeName<std::vector<T>>::Get() == _type))
-      _values.emplace_back(::to_string(value));
+      _values.emplace_back(internal::to_string(value));
      else
      {
        throw std::runtime_error("Type mismatch. "
@@ -207,7 +271,7 @@ public:
      if ((_nargs <= 1 && TypeName<T>::Get() == _type) ||
          (_nargs > 1 && TypeName<std::vector<T>>::Get() == _type))
      {
-      _values.emplace_back(::to_string(value));
+      _values.emplace_back(internal::to_string(value));
        default_value(values...);
      }
      else
@@ -222,7 +286,11 @@ public:
    }
  
  private:
-  std::string _name;
+  // The '_names' vector contains all of the options specified by the user.
+  // And among them, '_long_name' and '_short_name' are selected.
+  std::string _long_name;
+  std::string _short_name;
+  std::vector<std::string> _names;
    std::string _type;
    std::string _help_message;
    std::function<void(void)> _func;
@@ -238,33 +306,113 @@ class Arser
  {
  public:
    explicit Arser(const std::string &program_description = {})
-      : _program_description{program_description}
+    : _program_description{program_description}
    {
-    add_argument("--help").help("Show help message and exit").nargs(0);
+    add_argument("-h", "--help").help("Show help message and exit").nargs(0);
    }
  
    Argument &add_argument(const std::string &arg_name)
    {
-    if (arg_name.at(0) != '-')
+    if (arg_name.at(0) != '-') /* positional */
      {
        _positional_arg_vec.emplace_back(arg_name);
        _arg_map[arg_name] = &_positional_arg_vec.back();
      }
-    else
+    else /* optional */
      {
+      // The length of optional argument name must be 2 or more.
+      // And it shouldn't be hard to recognize. e.g. '-', '--'
+      if (arg_name.size() < 2)
+      {
+        throw std::runtime_error("Too short name. The length of argument name must be 2 or more.");
+      }
+      if (arg_name == "--")
+      {
+        throw std::runtime_error(
+          "Too short name. Option name must contain at least one character other than dash.");
+      }
        _optional_arg_vec.emplace_back(arg_name);
+      _optional_arg_vec.back()._short_name = arg_name;
        _arg_map[arg_name] = &_optional_arg_vec.back();
      }
      return *_arg_map[arg_name];
    }
  
+  Argument &add_argument(const std::vector<std::string> &arg_name_vec)
+  {
+    assert(arg_name_vec.size() >= 2);
+    std::string long_opt, short_opt;
+    // find long and short option
+    for (const auto &arg_name : arg_name_vec)
+    {
+      if (arg_name.at(0) != '-')
+      {
+        throw std::runtime_error("Invalid argument. "
+                                 "Positional argument cannot have short option.");
+      }
+      assert(arg_name.size() >= 2);
+      if (long_opt.empty() && arg_name.at(0) == '-' && arg_name.at(1) == '-')
+      {
+        long_opt = arg_name;
+      }
+      if (short_opt.empty() && arg_name.at(0) == '-' && arg_name.at(1) != '-')
+      {
+        short_opt = arg_name;
+      }
+    }
+    // If one of the two is empty, fill it with the non-empty one for pretty printing.
+    if (long_opt.empty())
+    {
+      assert(not short_opt.empty());
+      long_opt = short_opt;
+    }
+    if (short_opt.empty())
+    {
+      assert(not long_opt.empty());
+      short_opt = long_opt;
+    }
+
+    _optional_arg_vec.emplace_back(short_opt, long_opt, arg_name_vec);
+    for (const auto &arg_name : arg_name_vec)
+    {
+      _arg_map[arg_name] = &_optional_arg_vec.back();
+    }
+    return _optional_arg_vec.back();
+  }
+
+  template <typename... Ts> Argument &add_argument(const std::string &arg_name, Ts... arg_names)
+  {
+    if (sizeof...(arg_names) == 0)
+    {
+      return add_argument(arg_name);
+    }
+    // sizeof...(arg_names) > 0
+    else
+    {
+      return add_argument(std::vector<std::string>{arg_name, arg_names...});
+    }
+  }
+
+  void validate_arguments(void)
+  {
+    // positional argument is always required.
+    for (const auto &arg : _positional_arg_vec)
+    {
+      if (arg._is_required)
+      {
+        throw std::runtime_error("Invalid arguments. Positional argument must always be required.");
+      }
+    }
+  }
+
    void parse(int argc, char **argv)
    {
+    validate_arguments();
      _program_name = argv[0];
      _program_name.erase(0, _program_name.find_last_of("/\\") + 1);
      if (argc >= 2)
      {
-      if (!std::strcmp(argv[1], "--help"))
+      if (!std::strcmp(argv[1], "--help") || !std::strcmp(argv[1], "-h"))
        {
          std::cout << *this;
          std::exit(0);
@@ -274,7 +422,7 @@ public:
          for (const auto &arg : _arg_map)
          {
            const auto &func = arg.second->_func;
-          if (func && !std::strcmp(argv[1], arg.second->_name.c_str()))
+          if (func && !std::strcmp(argv[1], arg.first.c_str()))
            {
              func();
              std::exit(0);
@@ -354,14 +502,111 @@ public:
  
    template <typename T> T get(const std::string &arg_name);
  
+  friend std::ostream &operator<<(std::ostream &stream, const Arser &parser)
+  {
+    // print description
+    if (!parser._program_description.empty())
+    {
+      stream << "What " << parser._program_name << " does: " << parser._program_description
+             << "\n\n";
+    }
+    /*
+    ** print usage
+    */
+    stream << "Usage: ./" << parser._program_name << " ";
+    // required optional argument
+    for (const auto &arg : parser._optional_arg_vec)
+    {
+      if (!arg._is_required)
+        continue;
+      stream << arg._short_name << " ";
+      std::string arg_name = arser::internal::remove_dash(arg._long_name);
+      std::for_each(arg_name.begin(), arg_name.end(),
+                    [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+      stream << " ";
+    }
+    // rest of the optional argument
+    for (const auto &arg : parser._optional_arg_vec)
+    {
+      if (arg._is_required)
+        continue;
+      stream << "[" << arg._short_name;
+      if (arg._nargs)
+      {
+        stream << " ";
+        std::string arg_name = arser::internal::remove_dash(arg._long_name);
+        std::for_each(arg_name.begin(), arg_name.end(),
+                      [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+      }
+      stream << "]"
+             << " ";
+    }
+    // positional arguement
+    for (const auto &arg : parser._positional_arg_vec)
+    {
+      stream << arg._long_name << " ";
+    }
+    stream << "\n\n";
+    /*
+    ** print argument list and its help message
+    */
+    // get the length of the longest argument
+    size_t length_of_longest_arg = 0;
+    for (const auto &arg : parser._positional_arg_vec)
+    {
+      length_of_longest_arg = std::max(length_of_longest_arg,
+                                       arser::internal::make_comma_concatenated(arg._names).size());
+    }
+    for (const auto &arg : parser._optional_arg_vec)
+    {
+      length_of_longest_arg = std::max(length_of_longest_arg,
+                                       arser::internal::make_comma_concatenated(arg._names).size());
+    }
+
+    const size_t message_width = 60;
+    // positional argument
+    if (!parser._positional_arg_vec.empty())
+    {
+      stream << "[Positional argument]" << std::endl;
+      for (const auto &arg : parser._positional_arg_vec)
+      {
+        stream.width(length_of_longest_arg);
+        stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
+        for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+        {
+          if (i)
+            stream << std::string(length_of_longest_arg, ' ') << "\t";
+          stream << arg._help_message.substr(i, message_width) << std::endl;
+        }
+      }
+      std::cout << std::endl;
+    }
+    // optional argument
+    if (!parser._optional_arg_vec.empty())
+    {
+      stream << "[Optional argument]" << std::endl;
+      for (const auto &arg : parser._optional_arg_vec)
+      {
+        stream.width(length_of_longest_arg);
+        stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
+        for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+        {
+          if (i)
+            stream << std::string(length_of_longest_arg, ' ') << "\t";
+          stream << arg._help_message.substr(i, message_width) << std::endl;
+        }
+      }
+    }
+
+    return stream;
+  }
+
  private:
    std::string _program_name;
    std::string _program_description;
    std::list<Argument> _positional_arg_vec;
    std::list<Argument> _optional_arg_vec;
    std::map<std::string, Argument *> _arg_map;
-
-  friend std::ostream &operator<<(std::ostream &, const Arser &);
  };
  
  template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
@@ -369,7 +614,8 @@ template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
    auto arg = _arg_map.find(arg_name);
    if (arg == _arg_map.end())
      throw std::runtime_error("Invalid argument. "
-                             "There is no argument you are looking for.");
+                             "There is no argument you are looking for: " +
+                             arg_name);
  
    if (arg->second->_type != TypeName<T>::Get())
      throw std::runtime_error("Type mismatch. "
@@ -383,7 +629,7 @@ template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
                               "You must make sure that the argument is given before accessing it. "
                               "You can do it by calling arser[\"argument\"].");
  
-  return ::lexical_cast<T>(arg->second->_values[0]);
+  return internal::lexical_cast<T>(arg->second->_values[0]);
  }
  
  template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name, std::vector<T> *)
@@ -391,7 +637,8 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name
    auto arg = _arg_map.find(arg_name);
    if (arg == _arg_map.end())
      throw std::runtime_error("Invalid argument. "
-                             "There is no argument you are looking for.");
+                             "There is no argument you are looking for: " +
+                             arg_name);
  
    if (arg->second->_type != TypeName<std::vector<T>>::Get())
      throw std::runtime_error("Type mismatch. "
@@ -399,7 +646,7 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name
  
    std::vector<T> data;
    std::transform(arg->second->_values.begin(), arg->second->_values.end(), std::back_inserter(data),
-                 [](std::string str) -> T { return ::lexical_cast<T>(str); });
+                 [](std::string str) -> T { return internal::lexical_cast<T>(str); });
    return data;
  }
  
@@ -408,100 +655,6 @@ template <typename T> T Arser::get(const std::string &arg_name)
    return get_impl(arg_name, static_cast<T *>(nullptr));
  }
  
-std::ostream &operator<<(std::ostream &stream, const Arser &parser)
-{
-  // print description
-  if (!parser._program_description.empty())
-  {
-    stream << "What " << parser._program_name << " does: " << parser._program_description << "\n\n";
-  }
-  /*
-  ** print usage
-  */
-  stream << "Usage: ./" << parser._program_name << " ";
-  // required optional argument
-  for (const auto &arg : parser._optional_arg_vec)
-  {
-    if (!arg._is_required)
-      continue;
-    stream << arg._name << " ";
-    std::string arg_name = arg._name.substr(2);
-    std::for_each(arg_name.begin(), arg_name.end(),
-                  [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
-    stream << " ";
-  }
-  // rest of the optional argument
-  for (const auto &arg : parser._optional_arg_vec)
-  {
-    if (arg._is_required)
-      continue;
-    stream << "[" << arg._name;
-    if (arg._nargs)
-    {
-      stream << " ";
-      std::string arg_name = arg._name.substr(2);
-      std::for_each(arg_name.begin(), arg_name.end(),
-                    [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
-    }
-    stream << "]"
-           << " ";
-  }
-  // positional arguement
-  for (const auto &arg : parser._positional_arg_vec)
-  {
-    stream << arg._name << " ";
-  }
-  stream << "\n\n";
-  /*
-  ** print argument list and its help message
-  */
-  // get the length of the longest argument
-  size_t length_of_longest_arg = 0;
-  for (const auto &arg : parser._positional_arg_vec)
-  {
-    length_of_longest_arg = std::max(length_of_longest_arg, arg._name.length());
-  }
-  for (const auto &arg : parser._optional_arg_vec)
-  {
-    length_of_longest_arg = std::max(length_of_longest_arg, arg._name.length());
-  }
-
-  const size_t message_width = 60;
-  // positional argument
-  if (!parser._positional_arg_vec.empty())
-  {
-    stream << "[Positional argument]" << std::endl;
-    for (const auto &arg : parser._positional_arg_vec)
-    {
-      stream.width(length_of_longest_arg);
-      stream << std::left << arg._name << "\t";
-      for (size_t i = 0; i < arg._help_message.length(); i += message_width)
-      {
-        if (i)
-          stream << std::string(length_of_longest_arg, ' ') << "\t";
-        stream << arg._help_message.substr(i, message_width) << std::endl;
-      }
-    }
-    std::cout << std::endl;
-  }
-  // optional argument
-  if (!parser._optional_arg_vec.empty())
-  {
-    stream << "[Optional argument]" << std::endl;
-    for (const auto &arg : parser._optional_arg_vec)
-    {
-      stream.width(length_of_longest_arg);
-      stream << std::left << arg._name << "\t";
-      for (size_t i = 0; i < arg._help_message.length(); i += message_width)
-      {
-        if (i)
-          stream << std::string(length_of_longest_arg, ' ') << "\t";
-        stream << arg._help_message.substr(i, message_width) << std::endl;
-      }
-    }
-  }
-
-  return stream;
-}
-
  } // namespace arser
+
+#endif // __ARSER_H__
diff --git a/compiler/arser/tests/HelpMessage.test.cpp b/compiler/arser/tests/HelpMessage.test.cpp

new file mode 100644 (file)

index 0000000..45cf840
--- /dev/null
+++ b/compiler/arser/tests/HelpMessage.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "arser/arser.h"
+
+#include "Prompt.h"
+
+using namespace arser;
+
+/**
+ * [WARNING] DO NOT GIVE THE ARSER '-h' or '--help' OPTION IN BELOW TESTS.
+ *
+ * arser exits with code 0 when '-h' option is given, which forces googletest to pass.
+ */
+
+TEST(HelpMessageTest, Default)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--dummy").nargs(0).help("Dummy optional argument");
+
+  std::ostringstream oss;
+  std::string expected_out = "Usage: ./arser [-h] [--dummy] \n"
+                             "\n"
+                             "[Optional argument]\n"
+                             "-h, --help       Show help message and exit\n"
+                             "--dummy   \tDummy optional argument\n";
+
+  test::Prompt prompt("./arser --dummy");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  oss << arser;
+
+  /* assert */
+  EXPECT_EQ(expected_out, oss.str());
+}
+
+TEST(HelpMessageTest, ShortOption)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("-v", "--verbose").nargs(0).help("Provides additional details");
+
+  std::ostringstream oss;
+  std::string expected_out = "Usage: ./arser [-h] [-v] \n"
+                             "\n"
+                             "[Optional argument]\n"
+                             "-h, --help   \tShow help message and exit\n"
+                             "-v, --verbose\tProvides additional details\n";
+
+  test::Prompt prompt("./arser -v");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  oss << arser;
+
+  /* assert */
+  EXPECT_EQ(expected_out, oss.str());
+}
diff --git a/compiler/arser/tests/Prompt.h b/compiler/arser/tests/Prompt.h

new file mode 100644 (file)

index 0000000..d816f19
--- /dev/null
+++ b/compiler/arser/tests/Prompt.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARSER_PROMPT_H__
+#define __ARSER_PROMPT_H__
+
+#include <iterator>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace arser
+{
+namespace test
+{
+
+class Prompt
+{
+public:
+  Prompt(const std::string &command)
+  {
+    std::istringstream iss(command);
+    std::vector<std::string> token(std::istream_iterator<std::string>{iss},
+                                   std::istream_iterator<std::string>());
+    _arg = std::move(token);
+    _argv.reserve(_arg.size());
+    for (const auto &t : _arg)
+    {
+      _argv.push_back(const_cast<char *>(t.data()));
+    }
+  }
+  int argc(void) const { return _argv.size(); }
+  char **argv(void) { return _argv.data(); }
+
+private:
+  std::vector<char *> _argv;
+  std::vector<std::string> _arg;
+};
+
+} // namespace test
+} // namespace arser
+
+#endif // __ARSER_PROMPT_H__
diff --git a/compiler/arser/tests/arser.test.cpp b/compiler/arser/tests/arser.test.cpp

index 28bee4238629e299d9c965d0cbc79ad0ed55859d..b37d0dec316adf160a6d50dcb37dda5c086f9087 100644 (file)
--- a/compiler/arser/tests/arser.test.cpp
+++ b/compiler/arser/tests/arser.test.cpp
@@ -54,8 +54,8 @@ TEST(BasicTest, option)
    Arser arser;
  
    arser.add_argument("--verbose")
-      .nargs(0)
-      .help("It provides additional details as to what the executable is doing");
+    .nargs(0)
+    .help("It provides additional details as to what the executable is doing");
  
    Prompt prompt("./executable --verbose");
    /* act */
@@ -71,13 +71,13 @@ TEST(BasicTest, OptionalArgument)
    Arser arser;
  
    arser.add_argument("--volume")
-      .nargs(1)
-      .type(arser::DataType::INT32)
-      .help("Set a volume as you provided.");
+    .nargs(1)
+    .type(arser::DataType::INT32)
+    .help("Set a volume as you provided.");
    arser.add_argument("--frequency")
-      .nargs(1)
-      .type(arser::DataType::FLOAT)
-      .help("Set a frequency as you provided.");
+    .nargs(1)
+    .type(arser::DataType::FLOAT)
+    .help("Set a frequency as you provided.");
  
    Prompt prompt("./radio --volume 5 --frequency 128.5");
    /* act */
@@ -99,9 +99,9 @@ TEST(BasicTest, NonRequiredOptionalArgument)
    Arser arser;
  
    arser.add_argument("--weight")
-      .nargs(1)
-      .type(arser::DataType::INT32)
-      .help("Set a volume as you provided.");
+    .nargs(1)
+    .type(arser::DataType::INT32)
+    .help("Set a volume as you provided.");
  
    Prompt prompt("./radio"); // empty argument
    /* act */
@@ -117,10 +117,10 @@ TEST(BasicTest, RequiredOptionalArgument)
    Arser arser;
  
    arser.add_argument("--volume")
-      .nargs(1)
-      .type(arser::DataType::INT32)
-      .required()
-      .help("Set a volume as you provided.");
+    .nargs(1)
+    .type(arser::DataType::INT32)
+    .required()
+    .help("Set a volume as you provided.");
  
    Prompt prompt("./radio");
    /* act */ /* assert */
@@ -152,20 +152,20 @@ TEST(BasicTest, MultipleOptionalArgument)
    Arser arser;
  
    arser.add_argument("--input_path")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("input path of this program.")
-      .required();
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.")
+    .required();
    arser.add_argument("--output_path")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("output path of this program.")
-      .required(true);
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
    arser.add_argument("--training_data")
-      .nargs(5)
-      .type(arser::DataType::INT32_VEC)
-      .help("give traning data to this program.")
-      .required();
+    .nargs(5)
+    .type(arser::DataType::INT32_VEC)
+    .help("give traning data to this program.")
+    .required();
  
    Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put "
                  "--training_data 2 43 234 3 334");
@@ -191,9 +191,9 @@ TEST(BasicTest, MultipleFloatValue)
    Arser arser;
  
    arser.add_argument("--add_float")
-      .nargs(2)
-      .type(arser::DataType::FLOAT_VEC)
-      .help("Add two float numbers.");
+    .nargs(2)
+    .type(arser::DataType::FLOAT_VEC)
+    .help("Add two float numbers.");
  
    Prompt prompt("./calculator --add_float 3.2 5.4");
    /* act */
@@ -213,9 +213,9 @@ TEST(BasicTest, MultipleStringValue)
    Arser arser;
  
    arser.add_argument("--three_color")
-      .nargs(3)
-      .type(arser::DataType::STR_VEC)
-      .help("insert your three favorite color");
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .help("insert your three favorite color");
  
    Prompt prompt("./color_factory --three_color red blue yellow");
    /* act */
@@ -255,8 +255,8 @@ TEST(BasicTest, ExitWithFunctionCallWithBind)
    Arser arser;
  
    arser.add_argument("--version")
-      .help("Show version and exit")
-      .exit_with(std::bind(printVersion, "1.2.0"));
+    .help("Show version and exit")
+    .exit_with(std::bind(printVersion, "1.2.0"));
  
    Prompt prompt("./arser --version");
    /* act */ /* assert */
@@ -286,34 +286,34 @@ TEST(BasicTest, DefaultValue)
    Arser arser;
  
    arser.add_argument("--delivery")
-      .nargs(3)
-      .type(arser::DataType::STR_VEC)
-      .default_value("pizza", "chicken", "hamburger")
-      .help("Enter three foods that you want to deliver");
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .default_value("pizza", "chicken", "hamburger")
+    .help("Enter three foods that you want to deliver");
    arser.add_argument("--assistant")
-      .type(arser::DataType::STR)
-      .default_value("Bixby")
-      .help("Enter name of your assistant");
+    .type(arser::DataType::STR)
+    .default_value("Bixby")
+    .help("Enter name of your assistant");
    arser.add_argument("--sound")
-      .type(arser::DataType::BOOL)
-      .nargs(1)
-      .default_value(true)
-      .help("Sound on/off");
+    .type(arser::DataType::BOOL)
+    .nargs(1)
+    .default_value(true)
+    .help("Sound on/off");
    arser.add_argument("--number")
-      .type(arser::DataType::INT32_VEC)
-      .nargs(4)
-      .default_value(1, 2, 3, 4)
-      .help("Enter the number that you want to call");
+    .type(arser::DataType::INT32_VEC)
+    .nargs(4)
+    .default_value(1, 2, 3, 4)
+    .help("Enter the number that you want to call");
    arser.add_argument("--time")
-      .type(arser::DataType::INT32_VEC)
-      .nargs(3)
-      .default_value(0, 0, 0)
-      .help("Current time(H/M/S)");
+    .type(arser::DataType::INT32_VEC)
+    .nargs(3)
+    .default_value(0, 0, 0)
+    .help("Current time(H/M/S)");
    arser.add_argument("--name")
-      .type(arser::DataType::STR)
-      .nargs(1)
-      .default_value("no name")
-      .help("Enter your name");
+    .type(arser::DataType::STR)
+    .nargs(1)
+    .default_value("no name")
+    .help("Enter your name");
  
    Prompt prompt("/phone --time 1 52 34 --name arser");
    /* act */
@@ -342,3 +342,102 @@ TEST(BasicTest, DefaultValue)
    // 1 string, 1 argument
    EXPECT_EQ("arser", arser.get<std::string>("--name"));
  }
+
+TEST(BasicTest, shortOption)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--input_path", "-i")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.")
+    .required();
+  arser.add_argument("--output_path", "-o")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
+
+  Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--input_path"]);
+  EXPECT_EQ("/I/am/in.put", arser.get<std::string>("--input_path"));
+  EXPECT_TRUE(arser["--output_path"]);
+  EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
+
+TEST(BasicTest, shortMultipleOption)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--input_path", "-i", "--input", "--in")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.")
+    .required();
+  arser.add_argument("--output_path", "-o")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
+
+  Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--input"]);
+  EXPECT_EQ("/I/am/in.put", arser.get<std::string>("--input"));
+  EXPECT_TRUE(arser["--output_path"]);
+  EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
+
+TEST(BasicTest, OptWithRequiredDuplicate)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--input_path", "-i", "--input", "--in")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.")
+    .required();
+  arser.add_argument("--output_path", "-o")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
+
+  Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+  /* act */ /* assert */
+  EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error);
+}
+
+TEST(BasicTest, OptWithNonRequiredDuplicate)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--input_path", "-i", "--input", "--in")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.");
+  /* .required() */
+  arser.add_argument("--output_path", "-o")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
+
+  Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--input"]);
+  EXPECT_EQ("/I/am/duplicate", arser.get<std::string>("--input"));
+  EXPECT_TRUE(arser["--output_path"]);
+  EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
diff --git a/compiler/bino/include/bino.h b/compiler/bino/include/bino.h

index fc22d12851ed34fb979c30686c52272f4d6d0e15..bf540dffecc5657a4cc8218835b5829d6a64fe8d 100644 (file)
--- a/compiler/bino/include/bino.h
+++ b/compiler/bino/include/bino.h
@@ -33,8 +33,8 @@ public:
  public:
    template <typename T>
    auto operator()(const std::pair<T, T> &p) const
-      -> decltype(std::make_pair(std::declval<Callable>()(p.first),
-                                 std::declval<Callable>()(p.second)))
+    -> decltype(std::make_pair(std::declval<Callable>()(p.first),
+                               std::declval<Callable>()(p.second)))
    {
      return std::make_pair(f(p.first), f(p.second));
    }
diff --git a/compiler/caffegen/CMakeLists.txt b/compiler/caffegen/CMakeLists.txt

index 334174dcd0ffc9ed6ea53ec3921d576e114c50fb..b963b5294ddabc0b639d22231573d6b823718982 100644 (file)
--- a/compiler/caffegen/CMakeLists.txt
+++ b/compiler/caffegen/CMakeLists.txt
@@ -7,7 +7,6 @@ endif(NOT Caffe_FOUND)
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
  add_executable(caffegen ${SOURCES})
-target_link_libraries(caffegen stdex)
  target_link_libraries(caffegen cli)
  # NOTE "Caffe" package provides both caffe and caffeproto target
  # NOTE "caffeproto" is linked to "caffe"
diff --git a/compiler/caffegen/src/Driver.cpp b/compiler/caffegen/src/Driver.cpp

index 81b01e6f1911c5491aa04d07e9b3b7073becaeef..17e3ebb7f4a2ff16882fd815a5885c76a28cef7a 100644 (file)
--- a/compiler/caffegen/src/Driver.cpp
+++ b/compiler/caffegen/src/Driver.cpp
@@ -20,12 +20,12 @@
  #include "MergeCommand.h"
  
  #include <cli/App.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <map>
  #include <string>
  
-using stdex::make_unique;
+using std::make_unique;
  
  int main(int argc, char **argv)
  {
diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp

index 72cfa28a3b06662ba84f9dab5b0b7c24697ee51d..a450fd9e00ce0c19f7fae59c6316e529bc5fb3fb 100644 (file)
--- a/compiler/circle-inspect/driver/Driver.cpp
+++ b/compiler/circle-inspect/driver/Driver.cpp
@@ -29,11 +29,11 @@
  int entry(int argc, char **argv)
  {
    arser::Arser arser{
-      "circle-inspect allows users to retrieve various information from a Circle model file"};
+    "circle-inspect allows users to retrieve various information from a Circle model file"};
    arser.add_argument("--operators").nargs(0).help("Dump operators in circle file");
    arser.add_argument("--conv2d_weight")
-      .nargs(0)
-      .help("Dump Conv2D series weight operators in circle file");
+    .nargs(0)
+    .help("Dump Conv2D series weight operators in circle file");
    arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
    arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect");
  
diff --git a/compiler/circle-part-driver/CMakeLists.txt b/compiler/circle-part-driver/CMakeLists.txt

new file mode 100644 (file)

index 0000000..cb70874
--- /dev/null
+++ b/compiler/circle-part-driver/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(SRCS_PART_TESTER
+      src/Driver.cpp
+      src/PModelsRunner.cpp
+   )
+
+add_executable(circle_part_driver ${SRCS_PART_TESTER})
+target_link_libraries(circle_part_driver foder)
+target_link_libraries(circle_part_driver loco)
+target_link_libraries(circle_part_driver luci_import)
+target_link_libraries(circle_part_driver luci_lang)
+target_link_libraries(circle_part_driver luci_log)
+target_link_libraries(circle_part_driver luci_interpreter)
+target_link_libraries(circle_part_driver crew)
+target_link_libraries(circle_part_driver safemain)
+target_link_libraries(circle_part_driver nncc_common)
+
+install(TARGETS circle_part_driver DESTINATION bin)
diff --git a/compiler/circle-part-driver/README.md b/compiler/circle-part-driver/README.md

new file mode 100644 (file)

index 0000000..d66ecf5
--- /dev/null
+++ b/compiler/circle-part-driver/README.md
@@ -0,0 +1,3 @@
+# circle-part-driver
+
+_circle-part-driver_ is test driver to run partitioned circle models
diff --git a/compiler/circle-part-driver/requires.cmake b/compiler/circle-part-driver/requires.cmake

new file mode 100644 (file)

index 0000000..72296e3
--- /dev/null
+++ b/compiler/circle-part-driver/requires.cmake
@@ -0,0 +1,6 @@
+require("foder")
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("crew")
+require("safemain")
diff --git a/compiler/circle-part-driver/src/Driver.cpp b/compiler/circle-part-driver/src/Driver.cpp

new file mode 100644 (file)

index 0000000..a39bbf1
--- /dev/null
+++ b/compiler/circle-part-driver/src/Driver.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PModelsRunner.h"
+
+#include <luci/Log.h>
+
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+  LOGGER(l);
+
+  if (argc != 5)
+  {
+    std::cerr
+      << "Usage: " << argv[0]
+      << " <path/to/partition/config> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+    return EXIT_FAILURE;
+  }
+  // NOTE: about input/output data file name
+  // - I/O file name format is like filename.ext0, filename.ext1, ...
+  // NOTE: about output shape
+  // - file name with filename.ext0.shape, filename.ext1.shape, ...
+  //   having one line text content of CSV format(like H,W or N,C,H,W)
+
+  const char *config_filename = argv[1];
+  const int32_t num_inputs = atoi(argv[2]);
+  const char *input_prefix = argv[3];
+  const char *output_file = argv[4];
+
+  prunner::PModelsRunner pmrunner;
+
+  INFO(l) << "Read config file: " << config_filename << std::endl;
+  if (not pmrunner.load_config(config_filename))
+    return EXIT_FAILURE;
+
+  INFO(l) << "Read input file: " << input_prefix << ", #inputs: " << num_inputs << std::endl;
+  pmrunner.load_inputs(input_prefix, num_inputs);
+
+  INFO(l) << "Run all partitioned models..." << std::endl;
+  if (!pmrunner.run())
+    return EXIT_FAILURE;
+
+  INFO(l) << "Save output file: " << output_file << std::endl;
+  pmrunner.save_outputs(output_file);
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-part-driver/src/PModelsRunner.cpp b/compiler/circle-part-driver/src/PModelsRunner.cpp

new file mode 100644 (file)

index 0000000..453ce9b
--- /dev/null
+++ b/compiler/circle-part-driver/src/PModelsRunner.cpp
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PModelsRunner.h"
+
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+#include <luci/Importer.h>
+#include <luci/Log.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <foder/FileLoader.h>
+#include <crew/PConfig.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <stdexcept>
+
+namespace
+{
+
+void write_file(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+std::unique_ptr<luci::Module> import_circle(const std::string &filename)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+  {
+    throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
+  }
+  std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
+                               std::istreambuf_iterator<char>());
+
+  return luci::Importer().importModule(circle::GetModel(model_data.data()));
+}
+
+void save_shape(const std::string &shape_filename, const luci::CircleOutput *output_node)
+{
+  if (output_node->rank() == 0)
+  {
+    write_file(shape_filename, "1", 1);
+  }
+  else
+  {
+    auto shape_str = std::to_string(output_node->dim(0).value());
+    for (uint32_t j = 1; j < output_node->rank(); j++)
+    {
+      shape_str += ",";
+      shape_str += std::to_string(output_node->dim(j).value());
+    }
+    write_file(shape_filename, shape_str.c_str(), shape_str.size());
+  }
+}
+
+template <typename NodeT> size_t tensor_size(const NodeT *node)
+{
+  uint32_t tsize = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+  {
+    assert(node->dim(i).known());
+    tsize *= node->dim(i).value();
+  }
+  return tsize;
+}
+
+} // namespace
+
+namespace prunner
+{
+
+bool PModelsRunner::load_config(const std::string &filename)
+{
+  if (!crew::read_ini(filename, _pconfig))
+  {
+    std::cerr << "ERROR: Invalid config ini file: '" << filename << "'" << std::endl;
+    return false;
+  }
+
+  for (auto &part : _pconfig.parts)
+  {
+    _models_to_run.push_back(part.model_file);
+  }
+  return true;
+}
+
+void PModelsRunner::load_inputs(const std::string &input_prefix, int32_t num_inputs)
+{
+  LOGGER(l);
+
+  auto its = _pconfig.source.inputs.begin();
+  for (int32_t i = 0; i < num_inputs; ++i, ++its)
+  {
+    std::string filename = input_prefix + std::to_string(i);
+
+    INFO(l) << "Load input data: " << filename << std::endl;
+    foder::FileLoader file_loader{filename};
+
+    std::string input_name = *its;
+    _data_stage[input_name] = file_loader.load();
+
+    INFO(l) << "Input: [" << input_name << "], size " << _data_stage[input_name].size()
+            << std::endl;
+  }
+}
+
+/**
+ * @brief return true if all inputs of the model is ready in _data_storage
+ */
+bool PModelsRunner::is_input_ready(const RunModel &model)
+{
+  for (auto &part : _pconfig.parts)
+  {
+    if (part.model_file != model)
+      continue;
+
+    for (auto &input : part.inputs)
+    {
+      auto it = _data_stage.find(input);
+      if (it == _data_stage.end())
+        return false;
+    }
+  }
+  return true;
+}
+
+bool PModelsRunner::run(void)
+{
+  LOGGER(l);
+
+  // for each partitioned model, if the inputs of the model are ready, run the model
+  do
+  {
+    bool found_model = false;
+
+    for (auto it = _models_to_run.begin(); it != _models_to_run.end(); ++it)
+    {
+      auto model_fname = *it;
+
+      INFO(l) << "Check model input ready: " << model_fname << std::endl;
+      if (is_input_ready(model_fname))
+      {
+        found_model = true;
+
+        INFO(l) << "Run model: " << model_fname << std::endl;
+        auto module = import_circle(model_fname);
+
+        luci_interpreter::Interpreter interpreter(module.get());
+
+        // Set input
+        // TODO support multiple subgraphs
+        assert(module->size() == 1);
+        const auto input_nodes = loco::input_nodes(module->graph());
+        int32_t num_inputs = static_cast<int32_t>(input_nodes.size());
+        for (int32_t i = 0; i < num_inputs; i++)
+        {
+          const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+
+          auto input_name = input_node->name();
+          assert(_data_stage.find(input_name) != _data_stage.end());
+
+          auto input_data = _data_stage[input_name];
+
+          interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+        }
+
+        // Run interpreter
+        interpreter.interpret();
+        INFO(l) << "Run model: " << model_fname << " done" << std::endl;
+
+        // Get output.
+        const auto output_nodes = loco::output_nodes(module->graph());
+        for (uint32_t i = 0; i < module->graph()->outputs()->size(); i++)
+        {
+          const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+          auto output_name = output_node->name();
+
+          Buffer output_data(tensor_size(output_node));
+
+          interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+          // There should not exist same output names
+          // TODO check with multiple virtual outputs
+          assert(_data_stage.find(output_name) == _data_stage.end());
+          _data_stage[output_name] = output_data;
+        }
+
+        // We've ran this model, remove from the model list
+        _models_to_run.erase(it);
+        break;
+      }
+    }
+
+    if (not found_model)
+    {
+      std::cerr << "ERROR: model partition or configuration has problems" << std::endl;
+      return false;
+    }
+  } while (not _models_to_run.empty());
+
+  return true;
+}
+
+void PModelsRunner::save_outputs(const std::string &output_file)
+{
+  // load source model as we need to get both shape and node name
+  // TODO check for unknown shape
+  auto source_fname = _pconfig.source.model_file;
+
+  auto module = import_circle(source_fname);
+
+  const auto output_nodes = loco::output_nodes(module->graph());
+  for (uint32_t i = 0; i < module->graph()->outputs()->size(); i++)
+  {
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+
+    auto output_name = output_node->name();
+    assert(_data_stage.find(output_name) != _data_stage.end());
+
+    auto tensor_data = _data_stage[output_name];
+    auto output_filename = output_file + std::to_string(i);
+
+    write_file(output_filename, tensor_data.data(), tensor_data.size());
+    save_shape(output_filename + ".shape", output_node);
+  }
+}
+
+} // namespace prunner
diff --git a/compiler/circle-part-driver/src/PModelsRunner.h b/compiler/circle-part-driver/src/PModelsRunner.h

new file mode 100644 (file)

index 0000000..c1a45f0
--- /dev/null
+++ b/compiler/circle-part-driver/src/PModelsRunner.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
+#define __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
+
+#include <crew/PConfig.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace prunner
+{
+
+using Buffer = std::vector<char>;
+
+using Buffers = std::map<std::string, Buffer>;
+
+using RunModel = std::string;
+
+using RunModels = std::vector<RunModel>;
+
+/**
+ * @brief PModelsRunner runs partitioned models from input data file and stores
+ *        output data to a file
+ */
+class PModelsRunner
+{
+public:
+  PModelsRunner() = default;
+
+public:
+  bool load_config(const std::string &filename);
+  void load_inputs(const std::string &input_prefix, int32_t num_inputs);
+  bool run(void);
+  void save_outputs(const std::string &output_file);
+
+private:
+  bool is_input_ready(const RunModel &model);
+
+private:
+  crew::PConfig _pconfig;
+  RunModels _models_to_run;
+  Buffers _data_stage;
+};
+
+} // namespace prunner
+
+#endif // __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt

new file mode 100644 (file)

index 0000000..d75b17d
--- /dev/null
+++ b/compiler/circle-part-value-test/CMakeLists.txt
@@ -0,0 +1,99 @@
+#
+# this project validates partitioned models produced by circle-partitioner
+# with circle-part-driver and two scripts; part_eval_all.sh and part_eval_one.py
+#
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+unset(RECIPE_LIST)
+unset(PARTITION_LIST)
+unset(TEST_DEPS)
+
+macro(add RECIPE_NAME PARTITION_NAME)
+  list(APPEND RECIPE_LIST ${RECIPE_NAME})
+  list(APPEND PARTITION_LIST ${PARTITION_NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+
+list(LENGTH RECIPE_LIST RECIPE_LENGTH)
+math(EXPR RECIPE_LENGTH_M1 "${RECIPE_LENGTH} - 1")
+
+foreach(IDX RANGE ${RECIPE_LENGTH_M1})
+  list(GET RECIPE_LIST ${IDX} RECIPE_NAME)
+  list(GET PARTITION_LIST ${IDX} PARTITION_NAME)
+
+  # NOTE about the name:
+  # Use '.recipe' name for source tflite and circle files
+  # Use '.part' name for actual test folder and test files
+
+  # Output to a folder
+  set(PARTITIONER_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PARTITION_NAME}")
+
+  add_custom_command(OUTPUT ${PARTITIONER_OUTPUT_PATH}
+    COMMAND ${CMAKE_COMMAND} -E make_directory "${PARTITIONER_OUTPUT_PATH}"
+    COMMENT "Make directory ${PARTITIONER_OUTPUT_PATH}"
+  )
+
+  # Copy tflite
+  set(TFLITE_SRC_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE_NAME}.tflite")
+  set(TFLITE_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.tflite")
+
+  add_custom_command(OUTPUT ${TFLITE_DST_PATH}
+    COMMAND ${CMAKE_COMMAND} -E copy "${TFLITE_SRC_PATH}" "${TFLITE_DST_PATH}"
+    DEPENDS ${TFLITE_SRC_PATH}
+    COMMENT "Copy ${RECIPE_NAME}.tflite"
+  )
+  list(APPEND TEST_DEPS ${TFLITE_DST_PATH})
+
+  # Copy circle
+  set(CIRCLE_SRC_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE_NAME}.circle")
+  set(CIRCLE_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.circle")
+
+  add_custom_command(OUTPUT ${CIRCLE_DST_PATH}
+    COMMAND ${CMAKE_COMMAND} -E copy "${CIRCLE_SRC_PATH}" "${CIRCLE_DST_PATH}"
+    DEPENDS ${CIRCLE_SRC_PATH}
+    COMMENT "Copy ${RECIPE_NAME}.circle"
+  )
+  list(APPEND TEST_DEPS ${CIRCLE_DST_PATH})
+
+  # Copy .part
+  set(PART_FILE "${PARTITION_NAME}.part")
+  set(PART_SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/parts/${PART_FILE}")
+  set(PART_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PART_FILE}")
+
+  add_custom_command(OUTPUT ${PART_DST_PATH}
+    COMMAND ${CMAKE_COMMAND} -E copy "${PART_SRC_PATH}" "${PART_DST_PATH}"
+    DEPENDS ${PART_SRC_PATH}
+    COMMENT "Copy ${PART_FILE}"
+  )
+  list(APPEND TEST_DEPS ${PART_DST_PATH})
+
+  # Partition connection file to generate
+  set(PARTITIONER_CONN_JSON "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.conn.json")
+
+  # Run partitioner
+  add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON}
+    COMMAND circle_partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}"
+    DEPENDS circle_partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
+    COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
+  )
+  list(APPEND TEST_DEPS ${PARTITIONER_CONN_JSON})
+endforeach(IDX)
+
+add_custom_target(circle_part_value_test_prepare ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle_part_value_test_prepare common_artifacts_deps)
+
+# run evaluation
+add_test(NAME circle_part_value_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
+          "${CMAKE_CURRENT_BINARY_DIR}"
+          "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+          "$<TARGET_FILE:circle_part_driver>"
+          ${PARTITION_LIST}
+)
diff --git a/compiler/circle-part-value-test/README.md b/compiler/circle-part-value-test/README.md

new file mode 100644 (file)

index 0000000..6322b07
--- /dev/null
+++ b/compiler/circle-part-value-test/README.md
@@ -0,0 +1,15 @@
+# circle-part-value-test
+
+_circle-part-value-test_ evaluates partitioned models produced by circle-partitioner.
+
+### Process of evaluation
+
+Evaluation process is like how _luci-value-test_ does.
+
+1) generates random input and stores to reference input file(s)
+2) executes tflite file from common-artifacts for reference output
+3) partitions circle file with .part file and produces into output folder
+4) executes produced partitioned circle models with reference input file(s)
+5) saves output(s) of circle models to file(s)
+6) compares reference output with saved output file(s)
+7) fail test if values differ
diff --git a/compiler/circle-part-value-test/part_eval_all.sh b/compiler/circle-part-value-test/part_eval_all.sh

new file mode 100755 (executable)

index 0000000..ae8ae47
--- /dev/null
+++ b/compiler/circle-part-value-test/part_eval_all.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of circle-partitioner
+#
+# HOW TO USE
+#
+# ./part_eval_all.sh <path/to/work_dir> <path/to/venv_dir> <path/to/driver> <TEST 1> <TEST 2> ...
+#
+#    bin_dir  : build directory of circle-part-value-test (ex: build/compiler/circle-part-value-test)
+#    work_dir : artifacts directoy where test materials exist
+#    venv_dir : python virtual environment home directory
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/part_eval_one.py"
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+CIRCLE_PART_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  # for simplicity, folder uses same ${TESTCASE}
+  TESTCASE_FOLDER="${WORKDIR}/${TESTCASE}"
+  
+  PASSED_TAG="${TESTCASE_FOLDER}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TESTCASE_FOLDER}.log" <(
+    exec 2>&1
+    set -ex
+
+    # chdir into the folder as ini has relative filename of the model
+    pushd ${TESTCASE_FOLDER}
+
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${CIRCLE_PART_DRIVER_PATH}" \
+    --name "${TESTCASE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+
+    popd
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/circle-part-value-test/part_eval_one.py b/compiler/circle-part-value-test/part_eval_one.py

new file mode 100755 (executable)

index 0000000..b0b65fd
--- /dev/null
+++ b/compiler/circle-part-value-test/part_eval_one.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+import numpy as np
+import tensorflow as tf
+import subprocess
+import argparse
+import traceback
+
+#
+# This script compares the execution result of TFLite interpreter and
+# partitioned model(s) from a circle model
+#
+# Basic usage for example:
+#   part_eval_one.py \
+#       --driver build/compiler/circle-part-driver/circle-part-driver \
+#       --name test_file
+#
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--name', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+tflite_model = args.name + ".tflite"
+circle_model = args.name + ".circle"
+partition_conn_ini = args.name + ".conn.ini"
+
+# Build TFLite interpreter.
+interpreter = tf.lite.Interpreter(tflite_model)
+interpreter.allocate_tensors()
+
+# Generate random input data.
+num_inputs = len(interpreter.get_input_details())
+for i in range(num_inputs):
+    input_details = interpreter.get_input_details()[i]
+    if input_details["dtype"] == np.float32:
+        input_data = np.array(
+            np.random.random_sample(input_details["shape"]), input_details["dtype"])
+    elif input_details["dtype"] == np.uint8:
+        input_data = np.array(
+            np.random.randint(0, 256, size=input_details["shape"]),
+            input_details["dtype"])
+    elif input_details["dtype"] == np.bool_:
+        input_data = np.array(
+            np.random.choice(a=[True, False], size=input_details["shape"]),
+            input_details["dtype"])
+    else:
+        raise SystemExit("Unsupported input dtype")
+
+    interpreter.set_tensor(input_details["index"], input_data)
+    input_data.tofile(circle_model + ".input" + str(i))
+
+# Do inference
+interpreter.invoke()
+
+# Execute circle-part-driver.
+partition_command = [
+    driver, partition_conn_ini,
+    str(num_inputs), circle_model + ".input", circle_model + ".output"
+]
+print("Run: ")
+for arg in partition_command:
+    print("    ", arg, "\\")
+print("", flush=True)
+
+subprocess.run(partition_command, check=True)
+
+# Compare the results.
+for idx in range(len(interpreter.get_output_details())):
+    output_details = interpreter.get_output_details()[idx]
+    output_data = np.fromfile(circle_model + ".output" + str(idx),
+                              output_details["dtype"])
+    shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+    luci_output_data = np.reshape(output_data, output_shape)
+    try:
+        if output_details["dtype"] == np.uint8:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.float32:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=1.e-5,
+                    atol=1.e-5) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int64:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int32:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        else:
+            raise SystemExit("Unsupported data type: ", output_details["dtype"])
+    except:
+        print(traceback.format_exc())
+        quit(255)
+
+quit(0)
diff --git a/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.001.part b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.001.part

new file mode 100644 (file)

index 0000000..01b8c70
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.002.part b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.002.part

new file mode 100644 (file)

index 0000000..dc378a4
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.002.part
@@ -0,0 +1,8 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SUB=acl_cl
+DIV=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.part b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.part

new file mode 100644 (file)

index 0000000..d4d439d
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+DIV=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sqrt_000.part b/compiler/circle-part-value-test/parts/Part_Add_Sqrt_000.part

new file mode 100644 (file)

index 0000000..402af87
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sqrt_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sqrt_Rsqrt_000.part b/compiler/circle-part-value-test/parts/Part_Add_Sqrt_Rsqrt_000.part

new file mode 100644 (file)

index 0000000..c6dba9f
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sqrt_Rsqrt_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+RSQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sub_000.part b/compiler/circle-part-value-test/parts/Part_Add_Sub_000.part

new file mode 100644 (file)

index 0000000..905137c
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sub_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SUB=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_000.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_000.part

new file mode 100644 (file)

index 0000000..402af87
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_001.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_001.part

new file mode 100644 (file)

index 0000000..402af87
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_002.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_002.part

new file mode 100644 (file)

index 0000000..402af87
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_003.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_003.part

new file mode 100644 (file)

index 0000000..402af87
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_000.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_000.part

new file mode 100644 (file)

index 0000000..402af87
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_001.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_001.part

new file mode 100644 (file)

index 0000000..402af87
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_002.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_002.part

new file mode 100644 (file)

index 0000000..402af87
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_003.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_003.part

new file mode 100644 (file)

index 0000000..0ec264c
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+WWW=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_004.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_004.part

new file mode 100644 (file)

index 0000000..febab22
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_004.part
@@ -0,0 +1,6 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
diff --git a/compiler/circle-part-value-test/requires.cmake b/compiler/circle-part-value-test/requires.cmake

new file mode 100644 (file)

index 0000000..a9301f9
--- /dev/null
+++ b/compiler/circle-part-value-test/requires.cmake
@@ -0,0 +1,3 @@
+require("common-artifacts")
+require("circle-partitioner")
+require("circle-part-driver")
diff --git a/compiler/circle-part-value-test/test.lst b/compiler/circle-part-value-test/test.lst

new file mode 100644 (file)

index 0000000..8316560
--- /dev/null
+++ b/compiler/circle-part-value-test/test.lst
@@ -0,0 +1,20 @@
+# Add recipe names from /res/TensorFlowLiteRecipes to test.
+# Only add items exist in common-artifacts test: tflite/circle files are copied as source.
+#
+# add(RECIPE_NAME PARTITION_NAME)
+
+add(Part_Add_Sub_000 Part_Add_Sub_000)
+add(Part_Sqrt_Rsqrt_000 Part_Sqrt_Rsqrt_000)
+add(Part_Sqrt_Rsqrt_001 Part_Sqrt_Rsqrt_001)
+add(Part_Sqrt_Rsqrt_002 Part_Sqrt_Rsqrt_002)
+add(Part_Sqrt_Rsqrt_003 Part_Sqrt_Rsqrt_003)
+add(Part_Sqrt_Rsqrt_Add_000 Part_Sqrt_Rsqrt_Add_000)
+add(Part_Sqrt_Rsqrt_Add_001 Part_Sqrt_Rsqrt_Add_001)
+add(Part_Sqrt_Rsqrt_Add_002 Part_Sqrt_Rsqrt_Add_002)
+add(Part_Sqrt_Rsqrt_Add_003 Part_Sqrt_Rsqrt_Add_003)
+add(Part_Sqrt_Rsqrt_Add_004 Part_Sqrt_Rsqrt_Add_004)
+add(Part_Add_Sqrt_000 Part_Add_Sqrt_000)
+add(Part_Add_Sqrt_Rsqrt_000 Part_Add_Sqrt_Rsqrt_000)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003.001)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003.002)
diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt

new file mode 100644 (file)

index 0000000..573e6ec
--- /dev/null
+++ b/compiler/circle-partitioner/CMakeLists.txt
@@ -0,0 +1,17 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circle_partitioner "${SOURCES}")
+target_link_libraries(circle_partitioner foder)
+target_link_libraries(circle_partitioner crew)
+target_link_libraries(circle_partitioner safemain)
+target_link_libraries(circle_partitioner luci_lang)
+target_link_libraries(circle_partitioner luci_log)
+target_link_libraries(circle_partitioner luci_import)
+target_link_libraries(circle_partitioner luci_service)
+target_link_libraries(circle_partitioner luci_export)
+target_link_libraries(circle_partitioner luci_partition)
+target_link_libraries(circle_partitioner arser)
+target_link_libraries(circle_partitioner vconone)
+target_link_libraries(circle_partitioner nncc_common)
+
+install(TARGETS circle_partitioner DESTINATION bin)
diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md

new file mode 100644 (file)

index 0000000..7c387cf
--- /dev/null
+++ b/compiler/circle-partitioner/README.md
@@ -0,0 +1,3 @@
+# circle-partitioner
+
+_circle-partitioner_ provides model partitioning of circle model to two or more circle models.
diff --git a/compiler/circle-partitioner/requires.cmake b/compiler/circle-partitioner/requires.cmake

new file mode 100644 (file)

index 0000000..507a4d8
--- /dev/null
+++ b/compiler/circle-partitioner/requires.cmake
@@ -0,0 +1,6 @@
+require("foder")
+require("crew")
+require("safemain")
+require("luci")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-partitioner/src/CirclePartitioner.cpp b/compiler/circle-partitioner/src/CirclePartitioner.cpp

new file mode 100644 (file)

index 0000000..28ff22a
--- /dev/null
+++ b/compiler/circle-partitioner/src/CirclePartitioner.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionRead.h"
+#include "PartitionExport.h"
+#include "HelperPath.h"
+#include "HelperStrings.h"
+
+#include <foder/FileLoader.h>
+
+#include <luci/Importer.h>
+#include <luci/Service/Validate.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+#include <luci/Log.h>
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+#include <iostream>
+#include <string>
+
+namespace
+{
+
+const char *opt_bks = "--backends";
+const char *opt_def = "--default";
+const char *opt_part = "partition";
+const char *opt_input = "input";
+const char *opt_work = "work";
+
+void print_version(void)
+{
+  std::cout << "circle-partitioner version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+void build_arser(arser::Arser &arser)
+{
+  arser.add_argument("--version")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Show version information and exit")
+    .exit_with(print_version);
+
+  arser.add_argument(opt_bks)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Backends in CSV to use for partitioning");
+
+  arser.add_argument(opt_def)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Default backend to assign");
+
+  arser.add_argument(opt_part)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Partition file which provides backend to assign");
+  arser.add_argument(opt_input)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Input circle model filename");
+  arser.add_argument(opt_work)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Work folder of partition, input files exist and output files are produced");
+}
+
+std::unique_ptr<luci::Module> load_model(const std::string &input_path)
+{
+  // Load model from the file
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data = file_loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return nullptr;
+  }
+
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    return nullptr;
+  }
+
+  // Import from input Circle file
+  luci::Importer importer;
+  return importer.importModule(circle_model);
+}
+
+bool validate_module(luci::Module *module)
+{
+  for (size_t g = 0; g < module->size(); ++g)
+  {
+    auto graph = module->graph(g);
+    if (!luci::validate(graph))
+    {
+      std::cerr << "ERROR: Invalid circle model" << std::endl;
+      return false;
+    }
+    if (!luci::validate_name(graph))
+    {
+      std::cerr << "ERROR: circle model has empty name" << std::endl;
+      return false;
+    }
+  }
+
+  if (!luci::validate_unique_name(module))
+  {
+    std::cerr << "ERROR: circle model has duplicate names" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool validate_partition(luci::PartitionTable &partition)
+{
+  if (partition.groups.size() == 0)
+  {
+    std::cerr << "There is no 'backends' information";
+    return false;
+  }
+  if (partition.default_group.empty())
+  {
+    std::cerr << "There is no 'default' backend information";
+    return false;
+  }
+  if (!partee::is_one_of(partition.default_group, partition.groups))
+  {
+    std::cerr << "'default' backend is not one of 'backends' item";
+    return false;
+  }
+  for (auto &byopcode : partition.byopcodes)
+  {
+    if (!partee::is_one_of(byopcode.second, partition.groups))
+    {
+      std::cerr << "OPCODE " << byopcode.first << " is not assigned to one of 'backends' items";
+      return false;
+    }
+  }
+  return true;
+}
+
+void dump(std::ostream &os, const luci::PartitionTable &table)
+{
+  os << "Backends:";
+  for (auto &group : table.groups)
+  {
+    os << " " << group;
+    if (table.default_group == group)
+      os << "(default)";
+  }
+  os << std::endl;
+
+  os << "Assign by OPCODE: " << std::endl;
+  for (auto &item : table.byopcodes)
+    os << "  " << item.first << "=" << item.second << std::endl;
+}
+
+std::ostream &operator<<(std::ostream &os, const luci::PartitionTable &table)
+{
+  dump(os, table);
+  return os;
+}
+
+} // namespace
+
+int entry(int argc, char **argv)
+{
+  LOGGER(l);
+
+  arser::Arser arser("circle-partitioner provides circle model partitioning");
+
+  build_arser(arser);
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cerr << arser;
+    return EXIT_FAILURE;
+  }
+
+  std::string partition_file = arser.get<std::string>(opt_part);
+  std::string input_file = arser.get<std::string>(opt_input);
+  std::string work_folder = arser.get<std::string>(opt_work);
+
+  std::string partition_path = work_folder + "/" + partition_file;
+  std::string input_path = work_folder + "/" + input_file;
+
+  auto module = load_model(input_path);
+  if (module.get() == nullptr)
+  {
+    return EXIT_FAILURE;
+  }
+  if (!validate_module(module.get()))
+  {
+    return EXIT_FAILURE;
+  }
+
+  // Read partition information
+  INFO(l) << "--- Read PartitionConfig-----------------------" << std::endl;
+  auto partition = partee::read(partition_path);
+  INFO(l) << partition << std::endl;
+
+  // override with command line arguments
+  {
+    if (arser[opt_bks])
+    {
+      auto backend_backends = arser.get<std::string>(opt_bks);
+      partition.groups = partee::csv_to_vector<std::string>(backend_backends);
+    }
+    if (arser[opt_def])
+    {
+      partition.default_group = arser.get<std::string>(opt_def);
+    }
+  }
+  if (!validate_partition(partition))
+  {
+    return EXIT_FAILURE;
+  }
+
+  INFO(l) << "--- PartitionConfig final----------------------" << std::endl;
+  INFO(l) << partition << std::endl;
+
+  // apply partition to module
+  auto pms = luci::apply(module.get(), partition);
+
+  // validate partitioned modules
+  for (auto &pmodule : pms.pmodules)
+  {
+    for (size_t g = 0; g < pmodule.module->size(); ++g)
+    {
+      auto graph = pmodule.module->graph(g);
+      if (graph == nullptr)
+      {
+        std::cerr << "ERROR: Failed to create partition model" << std::endl;
+        return EXIT_FAILURE;
+      }
+      if (!luci::validate(graph))
+      {
+        std::cerr << "ERROR: Failed to create partition model" << std::endl;
+        return EXIT_FAILURE;
+      }
+    }
+  }
+
+  INFO(l) << "--- Partition Export---------------------------" << std::endl;
+  uint32_t idx = 1;
+  for (auto &pmodule : pms.pmodules)
+  {
+    // Export to output circle file
+    luci::CircleExporter exporter;
+
+    auto output_path = partee::make_path(work_folder, input_path, idx, pmodule.group);
+    pmodule.name = partee::get_filename_ext(output_path);
+    INFO(l) << "--- " << output_path << ": " << pmodule.name << std::endl;
+
+    luci::CircleFileExpContract contract(pmodule.module.get(), output_path);
+    if (!exporter.invoke(&contract))
+    {
+      std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+      return EXIT_FAILURE;
+    }
+    idx++;
+  }
+
+  INFO(l) << "--- Partition connection information-----------" << std::endl;
+  if (!partee::export_part_conn_json(work_folder, input_file, module.get(), pms))
+  {
+    return EXIT_FAILURE;
+  }
+  if (!partee::export_part_conn_ini(work_folder, input_file, module.get(), pms))
+  {
+    return EXIT_FAILURE;
+  }
+
+  INFO(l) << "--- Partition done-----------------------------" << std::endl << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-partitioner/src/HelperPath.cpp b/compiler/circle-partitioner/src/HelperPath.cpp

new file mode 100644 (file)

index 0000000..fc4bb2c
--- /dev/null
+++ b/compiler/circle-partitioner/src/HelperPath.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HelperPath.h"
+
+#include <cassert>
+#include <sstream>
+#include <stdlib.h>
+
+namespace partee
+{
+
+bool make_dir(const std::string &path)
+{
+  std::string command("mkdir -p ");
+  command += path;
+  int ret = ::system(command.c_str());
+  return ret == 0;
+}
+
+std::string get_filename_ext(const std::string &base)
+{
+  // find last '/' to get filename.ext
+  auto pos = base.find_last_of("/");
+  if (pos == std::string::npos)
+    return base;
+
+  return base.substr(pos + 1);
+}
+
+std::string make_path(const std::string &base, const std::string &input, uint32_t idx,
+                      const std::string &backend)
+{
+  auto filename_ext = get_filename_ext(input);
+
+  // We will assume file type .circle if not given
+  // TODO maybe throw if there is no extension?
+  std::string filename = filename_ext;
+  std::string ext = "circle";
+
+  auto pos = filename_ext.find_last_of(".");
+  if (pos != std::string::npos)
+  {
+    filename = filename_ext.substr(0, pos);
+    ext = filename_ext.substr(pos + 1);
+  }
+
+  // format idx with 5 '0' paddings like '00123'
+  uint32_t length = 5;
+  auto seq = std::string(length, '0').append(std::to_string(idx));
+  auto seq_fmt = seq.substr(seq.size() - length);
+
+  return base + "/" + filename + "." + seq_fmt + "_" + backend + "." + ext;
+}
+
+} // namespace partee
diff --git a/compiler/circle-partitioner/src/HelperPath.h b/compiler/circle-partitioner/src/HelperPath.h

new file mode 100644 (file)

index 0000000..e38e3a9
--- /dev/null
+++ b/compiler/circle-partitioner/src/HelperPath.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_HELPER_PATH_H__
+#define __CIRCLE_HELPER_PATH_H__
+
+#include <string>
+
+namespace partee
+{
+
+/**
+ * @brief create folder
+ */
+bool make_dir(const std::string &path);
+
+/**
+ * @brief get filename part of base
+ */
+std::string get_filename_ext(const std::string &base);
+
+/**
+ * @brief Make file path from base and backend
+ */
+std::string make_path(const std::string &base, const std::string &input, uint32_t idx,
+                      const std::string &backend);
+
+} // namespace partee
+
+#endif // __CIRCLE_HELPER_PATH_H__
diff --git a/compiler/circle-partitioner/src/HelperStrings.cpp b/compiler/circle-partitioner/src/HelperStrings.cpp

new file mode 100644 (file)

index 0000000..96d000c
--- /dev/null
+++ b/compiler/circle-partitioner/src/HelperStrings.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HelperStrings.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace partee
+{
+
+template <> std::vector<std::string> csv_to_vector(const std::string &str)
+{
+  std::vector<std::string> ret;
+  std::istringstream is(str);
+  for (std::string item; std::getline(is, item, ',');)
+  {
+    ret.push_back(item);
+  }
+  return ret;
+}
+
+bool is_one_of(const std::string &item, const std::vector<std::string> &items)
+{
+  return std::find(items.begin(), items.end(), item) != items.end();
+}
+
+} // namespace partee
diff --git a/compiler/circle-partitioner/src/HelperStrings.h b/compiler/circle-partitioner/src/HelperStrings.h

new file mode 100644 (file)

index 0000000..2af14c1
--- /dev/null
+++ b/compiler/circle-partitioner/src/HelperStrings.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_HELPER_STRINGS_H__
+#define __CIRCLE_HELPER_STRINGS_H__
+
+#include <string>
+#include <vector>
+
+namespace partee
+{
+
+template <typename T> std::vector<T> csv_to_vector(const std::string &str);
+
+bool is_one_of(const std::string &item, const std::vector<std::string> &items);
+
+} // namespace partee
+
+#endif // __CIRCLE_HELPER_STRINGS_H__
diff --git a/compiler/circle-partitioner/src/PartitionExport.cpp b/compiler/circle-partitioner/src/PartitionExport.cpp

new file mode 100644 (file)

index 0000000..a61451d
--- /dev/null
+++ b/compiler/circle-partitioner/src/PartitionExport.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionExport.h"
+#include "HelperPath.h"
+
+#include <crew/PConfig.h>
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::string export_file_path(const std::string &output_base, const std::string &input,
+                             const std::string &ext)
+{
+  auto filename_ext = partee::get_filename_ext(input);
+  auto pos = filename_ext.find_last_of(".");
+  assert(pos > 0);
+  auto filename = filename_ext.substr(0, pos);
+  auto filepath = output_base + "/" + filename + ".conn" + ext;
+  return filepath;
+}
+
+} // namespace
+
+namespace
+{
+
+void graph_io_to_config_part(loco::Graph *graph, crew::Part &part)
+{
+  assert(graph != nullptr);
+
+  auto *gis = graph->inputs();
+  auto *gos = graph->outputs();
+  for (uint32_t i = 0; i < gis->size(); ++i)
+  {
+    auto *gi = gis->at(i);
+    assert(gi != nullptr);
+    part.inputs.push_back(gi->name());
+  }
+  for (uint32_t i = 0; i < gos->size(); ++i)
+  {
+    auto *go = gos->at(i);
+    assert(go != nullptr);
+    part.outputs.push_back(go->name());
+  }
+}
+
+void pms2config(const luci::PartedModules &pms, crew::PConfig &pconfig)
+{
+  for (auto &pmodule : pms.pmodules)
+  {
+    auto *graph = pmodule.module->graph();
+
+    crew::Part part;
+    part.model_file = pmodule.name;
+    graph_io_to_config_part(graph, part);
+
+    pconfig.parts.push_back(part);
+  }
+}
+
+} // namespace
+
+namespace partee
+{
+
+bool export_part_conn_json(const std::string &output_base, const std::string &input,
+                           const luci::Module *source, luci::PartedModules &pms)
+{
+  crew::PConfig pconfig;
+
+  // TODO is graph I/O using main graph is enough?
+  auto *graph = source->graph();
+
+  pconfig.source.model_file = input;
+  graph_io_to_config_part(graph, pconfig.source);
+
+  pms2config(pms, pconfig);
+
+  auto filepath_json = export_file_path(output_base, input, ".json");
+  std::ofstream fs(filepath_json.c_str(), std::ofstream::binary | std::ofstream::trunc);
+  if (not fs.good())
+  {
+    std::cerr << "ERROR: Failed to create file: " << filepath_json;
+    return false;
+  }
+  if (not write_json(fs, pconfig))
+  {
+    std::cerr << "ERROR: Failed to write json file: " << filepath_json;
+    return false;
+  }
+  fs.close();
+
+  return true;
+}
+
+bool export_part_conn_ini(const std::string &output_base, const std::string &input,
+                          const luci::Module *source, luci::PartedModules &pms)
+{
+  crew::PConfig pconfig;
+
+  // TODO is graph I/O using main graph is enough?
+  auto *graph = source->graph();
+
+  pconfig.source.model_file = input;
+  graph_io_to_config_part(graph, pconfig.source);
+
+  pms2config(pms, pconfig);
+
+  auto filepath_ini = export_file_path(output_base, input, ".ini");
+  std::ofstream fs(filepath_ini.c_str(), std::ofstream::binary | std::ofstream::trunc);
+  if (not fs.good())
+  {
+    std::cerr << "ERROR: Failed to create file: " << filepath_ini;
+    return false;
+  }
+  if (not write_ini(fs, pconfig))
+  {
+    std::cerr << "ERROR: Failed to write ini file: " << filepath_ini;
+    return false;
+  }
+  fs.close();
+
+  return true;
+}
+
+} // namespace partee
diff --git a/compiler/circle-partitioner/src/PartitionExport.h b/compiler/circle-partitioner/src/PartitionExport.h

new file mode 100644 (file)

index 0000000..fd287dc
--- /dev/null
+++ b/compiler/circle-partitioner/src/PartitionExport.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PARTITION_EXPORT_H__
+#define __CIRCLE_PARTITION_EXPORT_H__
+
+#include <luci/Partition.h>
+
+#include <string>
+
+namespace partee
+{
+
+/**
+ * @brief This will save partition connection to json format file
+ */
+bool export_part_conn_json(const std::string &output_base, const std::string &input,
+                           const luci::Module *source, luci::PartedModules &pms);
+
+/**
+ * @brief This will save partition connection to ini format file
+ */
+bool export_part_conn_ini(const std::string &output_base, const std::string &input,
+                          const luci::Module *source, luci::PartedModules &pms);
+
+} // namespace partee
+
+#endif // __CIRCLE_PARTITION_EXPORT_H__
diff --git a/compiler/circle-partitioner/src/PartitionRead.cpp b/compiler/circle-partitioner/src/PartitionRead.cpp

new file mode 100644 (file)

index 0000000..b179ecb
--- /dev/null
+++ b/compiler/circle-partitioner/src/PartitionRead.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionRead.h"
+#include "HelperStrings.h"
+
+#include <crew/PConfigIni.h>
+#include <crew/PConfigIniDump.h>
+#include <luci/Log.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+using namespace partee;
+
+const char *_section_partition = "partition";
+const char *_section_OPCODE = "OPCODE";
+
+const char *_key_backends = "backends";
+const char *_key_default = "default";
+const char *_key_underscore = "_";
+
+luci::PartitionTable parse_table(const crew::Sections &sections)
+{
+  luci::PartitionTable table;
+
+  for (auto &section : sections)
+  {
+    if (section.name == _section_partition)
+    {
+      auto &items = section.items;
+      if (items.find(_key_backends) == items.end())
+      {
+        throw std::invalid_argument("'backends' is required");
+      }
+      if (items.find(_key_default) == items.end())
+      {
+        throw std::invalid_argument("'default' is required");
+      }
+
+      table.groups = csv_to_vector<std::string>(items.at(_key_backends));
+      table.default_group = items.at(_key_default);
+    }
+    else if (section.name == _section_OPCODE)
+    {
+      auto &items = section.items;
+
+      for (auto &item : items)
+      {
+        if (item.first == _key_underscore)
+          table.default_group = item.second;
+        else
+        {
+          table.byopcodes.emplace(item.first, item.second);
+        }
+      }
+    }
+  }
+
+  return table;
+}
+
+} // namespace
+
+namespace partee
+{
+
+luci::PartitionTable read(const std::string &path)
+{
+  LOGGER(l);
+
+  INFO(l) << "PartitionConfig: " << path << std::endl;
+
+  auto partition_config = crew::read_ini(path);
+
+  INFO(l) << partition_config << std::endl;
+
+  auto partition_table = parse_table(partition_config);
+
+  return partition_table;
+}
+
+} // namespace partee
diff --git a/compiler/circle-partitioner/src/PartitionRead.h b/compiler/circle-partitioner/src/PartitionRead.h

new file mode 100644 (file)

index 0000000..9b07b32
--- /dev/null
+++ b/compiler/circle-partitioner/src/PartitionRead.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PARTITION_READ_H__
+#define __CIRCLE_PARTITION_READ_H__
+
+#include <luci/IR/Module.h>
+#include <luci/Partition.h>
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace partee
+{
+
+/**
+ * @brief Reads and parse file and return PartitionTable
+ */
+luci::PartitionTable read(const std::string &path);
+
+} // namespace partee
+
+#endif // __CIRCLE_PARTITION_READ_H__
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt

index 5075b13d5ec7ccc03ca076461ce0a8583a4de8ce..a5f5f61c426b7f552bb04ff5182b9d6da9fe78f9 100644 (file)
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -10,6 +10,7 @@ target_link_libraries(circle-quantizer luci_import)
  target_link_libraries(circle-quantizer luci_service)
  target_link_libraries(circle-quantizer luci_pass)
  target_link_libraries(circle-quantizer luci_export)
+target_link_libraries(circle-quantizer luci_env)
  target_link_libraries(circle-quantizer arser)
  target_link_libraries(circle-quantizer vconone)
  
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp

index 54b38a1706b7fa5713c7720ba5e311620188e755..720430e5acf8d16a0554f6fecca33e7e7597ace9 100644 (file)
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -21,6 +21,7 @@
  #include <luci/Service/Validate.h>
  #include <luci/CircleExporter.h>
  #include <luci/CircleFileExpContract.h>
+#include <luci/UserSettings.h>
  
  #include <oops/InternalExn.h>
  #include <arser/arser.h>
@@ -57,47 +58,53 @@ int entry(int argc, char **argv)
    luci::CircleOptimizer optimizer;
  
    auto options = optimizer.options();
+  auto settings = luci::UserSettings::settings();
  
    const std::string qdqw = "--quantize_dequantize_weights";
    const std::string qwmm = "--quantize_with_minmax";
    const std::string rq = "--requantize";
  
+  const std::string gpd = "--generate_profile_data";
+
    arser::Arser arser("circle-quantizer provides circle model quantization");
  
    arser.add_argument("--version")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("Show version information and exit")
-      .exit_with(print_version);
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Show version information and exit")
+    .exit_with(print_version);
  
    arser.add_argument(qdqw)
-      .nargs(3)
-      .type(arser::DataType::STR_VEC)
-      .required(false)
-      .help("Quantize-dequantize weight values required action before quantization. "
-            "Three arguments required: input_dtype(float32) "
-            "output_dtype(uint8) granularity(layer, channel)");
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .required(false)
+    .help("Quantize-dequantize weight values required action before quantization. "
+          "Three arguments required: input_dtype(float32) "
+          "output_dtype(uint8) granularity(layer, channel)");
  
    arser.add_argument(qwmm)
-      .nargs(3)
-      .type(arser::DataType::STR_VEC)
-      .required(false)
-      .help("Quantize with min/max values. "
-            "Three arguments required: input_dtype(float32) "
-            "output_dtype(uint8) granularity(layer, channel)");
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .required(false)
+    .help("Quantize with min/max values. "
+          "Three arguments required: input_dtype(float32) "
+          "output_dtype(uint8) granularity(layer, channel)");
  
    arser.add_argument(rq)
-      .nargs(2)
-      .type(arser::DataType::STR_VEC)
-      .required(false)
-      .help("Requantize a quantized model. "
-            "Two arguments required: input_dtype(int8) "
-            "output_dtype(uint8)");
+    .nargs(2)
+    .type(arser::DataType::STR_VEC)
+    .required(false)
+    .help("Requantize a quantized model. "
+          "Two arguments required: input_dtype(int8) "
+          "output_dtype(uint8)");
  
    arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
    arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
  
+  arser.add_argument(gpd).nargs(0).required(false).default_value(false).help(
+    "This will turn on profiling data generation.");
+
    try
    {
      arser.parse(argc, argv);
@@ -109,13 +116,20 @@ int entry(int argc, char **argv)
      return 255;
    }
  
-  if (arser[qdqw])
    {
-    if (arser[qwmm] || arser[rq])
+    // only one of qdqw, qwmm, rq option can be used
+    int32_t opt_used = arser[qdqw] ? 1 : 0;
+    opt_used += arser[qwmm] ? 1 : 0;
+    opt_used += arser[rq] ? 1 : 0;
+    if (opt_used != 1)
      {
        print_exclusive_options();
        return 255;
      }
+  }
+
+  if (arser[qdqw])
+  {
      auto values = arser.get<std::vector<std::string>>(qdqw);
      if (values.size() != 3)
      {
@@ -131,11 +145,6 @@ int entry(int argc, char **argv)
  
    if (arser[qwmm])
    {
-    if (arser[qdqw] || arser[rq])
-    {
-      print_exclusive_options();
-      return 255;
-    }
      auto values = arser.get<std::vector<std::string>>(qwmm);
      if (values.size() != 3)
      {
@@ -151,11 +160,6 @@ int entry(int argc, char **argv)
  
    if (arser[rq])
    {
-    if (arser[qwmm] || arser[qdqw])
-    {
-      print_exclusive_options();
-      return 255;
-    }
      auto values = arser.get<std::vector<std::string>>(rq);
      if (values.size() != 2)
      {
@@ -171,6 +175,9 @@ int entry(int argc, char **argv)
    std::string input_path = arser.get<std::string>("input");
    std::string output_path = arser.get<std::string>("output");
  
+  if (arser[gpd])
+    settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+
    // Load model from the file
    foder::FileLoader file_loader{input_path};
    std::vector<char> model_data = file_loader.load();
diff --git a/compiler/circle-tensordump/driver/Driver.cpp b/compiler/circle-tensordump/driver/Driver.cpp

index 5bab9f59e757d692341cefa7553a1e86492ace74..70f3c8d840005e7dbdb8769d12a5d8d894ea3b82 100644 (file)
--- a/compiler/circle-tensordump/driver/Driver.cpp
+++ b/compiler/circle-tensordump/driver/Driver.cpp
@@ -29,14 +29,14 @@
  int entry(int argc, char **argv)
  {
    arser::Arser arser{
-      "circle-tensordump allows users to retrieve tensor information from a Circle model file"};
+    "circle-tensordump allows users to retrieve tensor information from a Circle model file"};
  
    arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Circle file path to dump");
    arser.add_argument("--tensors").nargs(0).help("Dump to console");
    arser.add_argument("--tensors_to_hdf5")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("Dump to hdf5 file. Specify hdf5 file path to be dumped");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Dump to hdf5 file. Specify hdf5 file path to be dumped");
  
    try
    {
diff --git a/compiler/circle-tensordump/src/Dump.cpp b/compiler/circle-tensordump/src/Dump.cpp

index dee2f36206ac3049795d1d392e06e5ef72c29ac5..d5c3fe6fa60982c4dc124ae2e69cfbd5a45aad41 100644 (file)
--- a/compiler/circle-tensordump/src/Dump.cpp
+++ b/compiler/circle-tensordump/src/Dump.cpp
@@ -253,7 +253,7 @@ void write_vector_data_to_hdf5(H5::H5File &file, std::string &group_name, std::s
      return;
    auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
    auto dataset = std::make_unique<H5::DataSet>(
-      file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
+    file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
    dataset->write(data->data(), type);
  }
  
@@ -264,7 +264,7 @@ void write_scalar_data_to_hdf5(H5::H5File &file, std::string &group_name, std::s
  {
    auto dataspace = std::make_unique<H5::DataSpace>(H5S_SCALAR);
    auto dataset = std::make_unique<H5::DataSet>(
-      file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
+    file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
    dataset->write(&data, type);
  }
  
@@ -308,7 +308,7 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
        // create a group for each tensor whose name is its tensor name
        std::string group_name = ::mangle(tensor->name()->c_str());
        std::unique_ptr<H5::Group> tensor_group =
-          std::make_unique<H5::Group>(file.createGroup(group_name));
+        std::make_unique<H5::Group>(file.createGroup(group_name));
  
        // write a buffer data
        uint32_t buff_idx = tensor->buffer();
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst

index 3a95e2be2d80b90e78b5d8fbe823afedd70f5f64..bb944201edf51ce8815e7203c5f6f7d29c90cf5d 100644 (file)
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -11,14 +11,27 @@
  ## TFLITE RECIPE
  
  Add(Net_Preactivation_BN_000 PASS fuse_preactivation_batchnorm)
+Add(Net_BroadcastTo_AddV2_000 PASS resolve_customop_add)
+Add(Net_BroadcastTo_AddV2_001 PASS resolve_customop_add)
+Add(Net_Conv_Add_Mul_000 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_Add_Mul_001 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_Add_Mul_002 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_Min_Max_000 PASS transform_min_max_to_relu6)
+Add(Net_Conv_Relu6_000 PASS fuse_activation_function)
+Add(Net_DwConv_BN_000 PASS fuse_batchnorm_with_dwconv)
+Add(Net_DwConv_BN_001 PASS fuse_batchnorm_with_dwconv)
+Add(Net_Reshape_Reshape_000 PASS remove_redundant_reshape)
+Add(Net_Squeeze_Squeeze_000 PASS substitute_squeeze_to_reshape)
  Add(Net_TConv_Add_000 PASS fuse_add_with_tconv)
  Add(Net_TConv_Add_001 PASS fuse_add_with_tconv)
  Add(Net_TConv_Add_002 PASS fuse_add_with_tconv)
  Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
  Add(Net_TConv_BN_001 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv)
  Add(Net_InstanceNorm_001 PASS fuse_instnorm)
  Add(Net_InstanceNorm_002 PASS fuse_instnorm)
  Add(Net_InstanceNorm_003 PASS fuse_instnorm)
+Add(Net_Maximum_Minimum_000 PASS transform_min_max_to_relu6)
  Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
  Add(MatMul_000 PASS resolve_customop_matmul)
  Add(DepthwiseConv2D_003 PASS)
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp

index cde5de8fd5b68cf993618be68be8fef475940639..da05a0a9a9d34a68fd897f686348adba6245d353 100644 (file)
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -51,157 +51,266 @@ int entry(int argc, char **argv)
    arser::Arser arser("circle2circle provides circle model optimization and transformations");
  
    arser.add_argument("--version")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("Show version information and exit")
-      .exit_with(print_version);
-
-  arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
-      "Enable all optimize options");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Show version information and exit")
+    .exit_with(print_version);
+
+  arser.add_argument("--O1").nargs(0).required(false).default_value(false).help(
+    "Enable O1 optimize options");
+
+  arser.add_argument("--fold_add_v2")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fold AddV2 operators with constant inputs");
+
+  arser.add_argument("--fold_cast")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fold Cast operators with constant input");
  
    arser.add_argument("--fold_dequantize")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fold dequantize op");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fold dequantize op");
+
+  arser.add_argument("--fold_sparse_to_dense")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fold SparseToDense operator");
+
+  arser.add_argument("--forward_reshape_to_unaryop")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will move Reshape after UnaryOp for centain condition");
  
    arser.add_argument("--fuse_activation_function")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse Activation function to a preceding operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse Activation function to a preceding operator");
  
    arser.add_argument("--fuse_add_with_tconv")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse Add operator to Transposed Convolution operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse Add operator to Transposed Convolution operator");
+
+  arser.add_argument("--fuse_batchnorm_with_conv")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse BatchNorm operators to Convolution operator");
+
+  arser.add_argument("--fuse_batchnorm_with_dwconv")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse BatchNorm operators to Depthwise Convolution operator");
  
    arser.add_argument("--fuse_batchnorm_with_tconv")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse BatchNorm operators to Transposed Convolution operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse BatchNorm operators to Transposed Convolution operator");
  
    arser.add_argument("--fuse_bcq")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse operators and apply Binary Coded Quantization");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse operators and apply Binary Coded Quantization");
  
    arser.add_argument("--fuse_instnorm")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse operators to InstanceNorm operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse operators to InstanceNorm operator");
  
    arser.add_argument("--make_batchnorm_gamma_positive")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will make negative gamma of BatchNorm into a small positive value (1e-10). Note "
-            "that this pass can change the execution result of the model. So, use it only when the "
-            "impact is known to be acceptable.");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will make negative gamma of BatchNorm into a small positive value (1e-10). Note "
+          "that this pass can change the execution result of the model. So, use it only when the "
+          "impact is known to be acceptable.");
  
    arser.add_argument("--fuse_preactivation_batchnorm")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse BatchNorm operators of pre-activations to Convolution operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse BatchNorm operators of pre-activations to Convolution operator");
+
+  arser.add_argument("--remove_redundant_reshape")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse or remove subsequent Reshape operators");
  
    arser.add_argument("--remove_redundant_transpose")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse or remove subsequent Transpose operators");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse or remove subsequent Transpose operators");
+
+  arser.add_argument("--remove_unnecessary_reshape")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will remove unnecessary reshape operators");
+
+  arser.add_argument("--remove_unnecessary_slice")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will remove unnecessary slice operators");
+
+  arser.add_argument("--remove_unnecessary_strided_slice")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will remove unnecessary strided slice operators");
+
+  arser.add_argument("--remove_unnecessary_split")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will remove unnecessary split operators");
  
    arser.add_argument("--replace_cw_mul_add_with_depthwise_conv")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will replace channel-wise mul/add with DepthwiseConv2D operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will replace channel-wise mul/add with DepthwiseConv2D operator");
  
    arser.add_argument("--resolve_customop_add")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will convert Custom(Add) to Add operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will convert Custom(Add) to Add operator");
  
    arser.add_argument("--resolve_customop_batchmatmul")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will convert Custom(BatchMatmul) to BatchMatmul operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will convert Custom(BatchMatmul) to BatchMatmul operator");
  
    arser.add_argument("--resolve_customop_matmul")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will convert Custom(Matmul) to Matmul operator");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will convert Custom(Matmul) to Matmul operator");
  
    arser.add_argument("--shuffle_weight_to_16x1float32")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
-            "it only converts weights whose row is a multiple of 16");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
+          "it only converts weights whose row is a multiple of 16");
  
    arser.add_argument("--substitute_pack_to_reshape")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will convert single input Pack to Reshape");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will convert single input Pack to Reshape");
+
+  arser.add_argument("--substitute_squeeze_to_reshape")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will convert certain condition Squeeze to Reshape");
+
+  arser.add_argument("--substitute_transpose_to_reshape")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will convert single input Transpose to Reshape");
+
+  arser.add_argument("--convert_nchw_to_nhwc")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Experimental: This will convert NCHW operators to NHWC under the assumption that "
+          "input model is NCHW.");
+
+  arser.add_argument("--nchw_to_nhwc_preserve_input_shape")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Preserve the input shape of the model (argument for --convert_nchw_to_nhwc).");
+
+  arser.add_argument("--nchw_to_nhwc_preserve_output_shape")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Preserve the output shape of the model (argument for --convert_nchw_to_nhwc).");
+
+  arser.add_argument("--transform_min_max_to_relu6")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Transform Minimum-Maximum pattern to Relu6 operator");
  
    arser.add_argument("--mute_warnings")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will turn off warning messages");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will turn off warning messages");
  
    arser.add_argument("--disable_validation")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will turn off operator validations. May help input model investigation.");
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will turn off operator validations. May help input model investigation.");
+
+  arser.add_argument("--generate_profile_data")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will turn on profiling data generation.");
  
    arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
    arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
  
    // sparsification argument
    arser.add_argument("--sparsify_tensor")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(false)
-      .help("Tensor name that you want to sparsify");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Tensor name that you want to sparsify");
  
    arser.add_argument("--sparsify_traversal_order")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(false)
-      .default_value("0,1,2,3")
-      .help("Traversal order of dimensions. Default value: 0,1,2,3");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .default_value("0,1,2,3")
+    .help("Traversal order of dimensions. Default value: 0,1,2,3");
  
    arser.add_argument("--sparsify_format")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(false)
-      .default_value("d,s")
-      .help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default "
-            "value: d,s");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .default_value("d,s")
+    .help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default "
+          "value: d,s");
  
    arser.add_argument("--sparsify_block_size")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(false)
-      .help("Size of each block dimension");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Size of each block dimension");
  
    arser.add_argument("--sparsify_block_map")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(false)
-      .default_value("0,1")
-      .help("Map from block dimension to the original tensor dimension. Default value: 0,1");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .default_value("0,1")
+    .help("Map from block dimension to the original tensor dimension. Default value: 0,1");
  
    try
    {
@@ -214,7 +323,7 @@ int entry(int argc, char **argv)
      return 255;
    }
  
-  if (arser.get<bool>("--all"))
+  if (arser.get<bool>("--O1"))
    {
      options->enable(Algorithms::FuseBCQ);
      options->enable(Algorithms::FuseInstanceNorm);
@@ -224,12 +333,24 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::RemoveRedundantTranspose);
      options->enable(Algorithms::SubstitutePackToReshape);
    }
+  if (arser.get<bool>("--fold_add_v2"))
+    options->enable(Algorithms::FoldAddV2);
+  if (arser.get<bool>("--fold_cast"))
+    options->enable(Algorithms::FoldCast);
    if (arser.get<bool>("--fold_dequantize"))
      options->enable(Algorithms::FoldDequantize);
+  if (arser.get<bool>("--fold_sparse_to_dense"))
+    options->enable(Algorithms::FoldSparseToDense);
+  if (arser.get<bool>("--forward_reshape_to_unaryop"))
+    options->enable(Algorithms::ForwardReshapeToUnaryOp);
    if (arser.get<bool>("--fuse_activation_function"))
      options->enable(Algorithms::FuseActivationFunction);
+  if (arser.get<bool>("--fuse_batchnorm_with_conv"))
+    options->enable(Algorithms::FuseBatchNormWithConv);
    if (arser.get<bool>("--fuse_add_with_tconv"))
      options->enable(Algorithms::FuseAddWithTConv);
+  if (arser.get<bool>("--fuse_batchnorm_with_dwconv"))
+    options->enable(Algorithms::FuseBatchNormWithDwConv);
    if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
      options->enable(Algorithms::FuseBatchNormWithTConv);
    if (arser.get<bool>("--fuse_bcq"))
@@ -240,8 +361,18 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::MakeBatchNormGammaPositive);
    if (arser.get<bool>("--fuse_preactivation_batchnorm"))
      options->enable(Algorithms::FusePreActivationBatchNorm);
+  if (arser.get<bool>("--remove_redundant_reshape"))
+    options->enable(Algorithms::RemoveRedundantReshape);
    if (arser.get<bool>("--remove_redundant_transpose"))
      options->enable(Algorithms::RemoveRedundantTranspose);
+  if (arser.get<bool>("--remove_unnecessary_reshape"))
+    options->enable(Algorithms::RemoveUnnecessaryReshape);
+  if (arser.get<bool>("--remove_unnecessary_slice"))
+    options->enable(Algorithms::RemoveUnnecessarySlice);
+  if (arser.get<bool>("--remove_unnecessary_strided_slice"))
+    options->enable(Algorithms::RemoveUnnecessaryStridedSlice);
+  if (arser.get<bool>("--remove_unnecessary_split"))
+    options->enable(Algorithms::RemoveUnnecessarySplit);
    if (arser.get<bool>("--replace_cw_mul_add_with_depthwise_conv"))
      options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv);
    if (arser.get<bool>("--resolve_customop_add"))
@@ -254,11 +385,19 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::ShuffleWeightTo16x1Float32);
    if (arser.get<bool>("--substitute_pack_to_reshape"))
      options->enable(Algorithms::SubstitutePackToReshape);
+  if (arser.get<bool>("--substitute_squeeze_to_reshape"))
+    options->enable(Algorithms::SubstituteSqueezeToReshape);
+  if (arser.get<bool>("--substitute_transpose_to_reshape"))
+    options->enable(Algorithms::SubstituteTransposeToReshape);
+  if (arser.get<bool>("--transform_min_max_to_relu6"))
+    options->enable(Algorithms::TransformMinMaxToRelu6Pass);
  
    if (arser.get<bool>("--mute_warnings"))
      settings->set(luci::UserSettings::Key::MuteWarnings, true);
    if (arser.get<bool>("--disable_validation"))
      settings->set(luci::UserSettings::Key::DisableValidation, true);
+  if (arser.get<bool>("--generate_profile_data"))
+    settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
  
    std::string input_path = arser.get<std::string>("input");
    std::string output_path = arser.get<std::string>("output");
@@ -284,6 +423,15 @@ int entry(int argc, char **argv)
                     arser.get<std::string>("--sparsify_block_map"));
    }
  
+  if (arser.get<bool>("--convert_nchw_to_nhwc"))
+  {
+    options->enable(Algorithms::ConvertNCHWToNHWC);
+    if (arser.get<bool>("--nchw_to_nhwc_preserve_input_shape"))
+      options->param(AlgorithmParameters::NCHW_to_NHWC_preserve_input_shape, "true");
+    if (arser.get<bool>("--nchw_to_nhwc_preserve_output_shape"))
+      options->param(AlgorithmParameters::NCHW_to_NHWC_preserve_output_shape, "true");
+  }
+
    // Load model from the file
    foder::FileLoader file_loader{input_path};
    std::vector<char> model_data;
diff --git a/compiler/circle2circle/src/TestHelper.h b/compiler/circle2circle/src/TestHelper.h

index f4dbe23a9bcd08cbedf0e2bf42bdc2c279229e18..1e055b217558d9b8f626c314b136cd107e714a82 100644 (file)
--- a/compiler/circle2circle/src/TestHelper.h
+++ b/compiler/circle2circle/src/TestHelper.h
@@ -39,7 +39,7 @@ public:
    {
      assert(_ptr < N);
      _argv[_ptr] = new char[strlen(in) + 1];
-    strcpy(_argv[_ptr], in);
+    strncpy(_argv[_ptr], in, strlen(in) + 1);
      _ptr++;
    }
  
@@ -47,7 +47,7 @@ public:
  
  private:
    pchar_t _argv[N] = {
-      nullptr,
+    nullptr,
    };
    size_t _ptr = 0;
  };
diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt

index 2ca016b84241751ace63492dc96e568df08bc932..98a284c306b9047da04e10f18f912cd525a999a2 100644 (file)
--- a/compiler/circlechef/circle/CMakeLists.txt
+++ b/compiler/circlechef/circle/CMakeLists.txt
@@ -5,6 +5,5 @@ target_include_directories(circlechef_circle PUBLIC include)
  target_include_directories(circlechef_circle PRIVATE src)
  target_link_libraries(circlechef_circle circlechef_proto)
  target_link_libraries(circlechef_circle mio_circle)
-target_link_libraries(circlechef_circle stdex)
  target_link_libraries(circlechef_circle cwrap)
  target_link_libraries(circlechef_circle souschef)
diff --git a/compiler/circlechef/circle/src/Convert.cpp b/compiler/circlechef/circle/src/Convert.cpp

index 77614d9b5b1317947fedd1c50601a61739a9d754..248687fedc7a8d7bdae2f8a6bc3539bc8dd185c6 100644 (file)
--- a/compiler/circlechef/circle/src/Convert.cpp
+++ b/compiler/circlechef/circle/src/Convert.cpp
@@ -33,10 +33,11 @@ circlechef::TensorType as_circlechef_type(const circle::TensorType type)
        return circlechef::UINT8;
      case circle::TensorType_BOOL:
        return circlechef::BOOL;
+    case circle::TensorType_INT16:
+      return circlechef::INT16;
      // TODO handle other types
      // TensorType_FLOAT16
      // TensorType_STRING
-    // TensorType_INT16
      // TensorType_COMPLEX64
      default:
        throw std::runtime_error{"unsupported tensor type"};
diff --git a/compiler/circlechef/core/CMakeLists.txt b/compiler/circlechef/core/CMakeLists.txt

index 54b3ea53dab717546784f3caee9dcabd999883f2..0e8f47483292259d0e34019e54c4440e2e6ced5d 100644 (file)
--- a/compiler/circlechef/core/CMakeLists.txt
+++ b/compiler/circlechef/core/CMakeLists.txt
@@ -1,9 +1,23 @@
  file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(circlechef_core STATIC ${SOURCES})
  target_include_directories(circlechef_core PUBLIC include)
  target_include_directories(circlechef_core PRIVATE src)
-target_link_libraries(circlechef_core circlechef_proto)
-target_link_libraries(circlechef_core circlechef_log)
-target_link_libraries(circlechef_core mio_circle)
-target_link_libraries(circlechef_core souschef)
+target_link_libraries(circlechef_core PUBLIC circlechef_proto)
+target_link_libraries(circlechef_core PUBLIC circlechef_log)
+target_link_libraries(circlechef_core PUBLIC mio_circle)
+target_link_libraries(circlechef_core PUBLIC souschef)
+target_link_libraries(circlechef_core PRIVATE nncc_coverage)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circlechef_core_test ${TESTS})
+target_include_directories(circlechef_core_test PRIVATE src)
+target_link_libraries(circlechef_core_test circlechef_core)
+target_link_libraries(circlechef_core_test nncc_coverage)
diff --git a/compiler/circlechef/core/src/Convert.cpp b/compiler/circlechef/core/src/Convert.cpp

index 2db0a621215ecddd3c4150c9f1d578566edad143..d9bbd6e50a4cdd7c0e3e617b4e4476cc093c62c0 100644 (file)
--- a/compiler/circlechef/core/src/Convert.cpp
+++ b/compiler/circlechef/core/src/Convert.cpp
@@ -64,6 +64,8 @@ circle::TensorType as_circle_tensortype(const circlechef::TensorType &value)
        return circle::TensorType_INT64;
      case circlechef::BOOL:
        return circle::TensorType_BOOL;
+    case circlechef::INT16:
+      return circle::TensorType_INT16;
      default:
        break;
    }
diff --git a/compiler/circlechef/core/src/Convert.test.cpp b/compiler/circlechef/core/src/Convert.test.cpp

new file mode 100644 (file)

index 0000000..b17f5df
--- /dev/null
+++ b/compiler/circlechef/core/src/Convert.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <gtest/gtest.h>
+
+TEST(ConvertTest, as_circle_padding)
+{
+  ASSERT_EQ(circle::Padding_SAME, as_circle_padding(circlechef::SAME));
+  ASSERT_EQ(circle::Padding_VALID, as_circle_padding(circlechef::VALID));
+}
+
+TEST(ConvertTest, as_circle_padding_NEG)
+{
+  EXPECT_THROW(as_circle_padding(static_cast<circlechef::Padding>(99)), std::runtime_error);
+}
+
+TEST(ConvertTest, as_circle_activation)
+{
+  ASSERT_EQ(circle::ActivationFunctionType_NONE, as_circle_activation(circlechef::NONE));
+  ASSERT_EQ(circle::ActivationFunctionType_RELU, as_circle_activation(circlechef::RELU));
+  ASSERT_EQ(circle::ActivationFunctionType_RELU6, as_circle_activation(circlechef::RELU6));
+}
+
+TEST(ConvertTest, as_circle_activation_NEG)
+{
+  EXPECT_THROW(as_circle_activation(static_cast<circlechef::Activation>(99)), std::runtime_error);
+}
+
+TEST(ConvertTest, as_circle_tensortype)
+{
+  ASSERT_EQ(circle::TensorType_FLOAT32, as_circle_tensortype(circlechef::FLOAT32));
+  ASSERT_EQ(circle::TensorType_INT32, as_circle_tensortype(circlechef::INT32));
+  ASSERT_EQ(circle::TensorType_UINT8, as_circle_tensortype(circlechef::UINT8));
+  ASSERT_EQ(circle::TensorType_INT64, as_circle_tensortype(circlechef::INT64));
+  ASSERT_EQ(circle::TensorType_BOOL, as_circle_tensortype(circlechef::BOOL));
+  ASSERT_EQ(circle::TensorType_INT16, as_circle_tensortype(circlechef::INT16));
+}
+
+TEST(ConvertTest, as_circle_tensortype_NEG)
+{
+  EXPECT_THROW(as_circle_tensortype(static_cast<circlechef::TensorType>(99)), std::runtime_error);
+}
diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp

index 4f25d62c0a7cfa9d446f7bcbca3d39363b7d4f79..d7101f61828000589af998eb689c406127ea5891 100644 (file)
--- a/compiler/circlechef/core/src/ModelChef.cpp
+++ b/compiler/circlechef/core/src/ModelChef.cpp
@@ -51,7 +51,7 @@ class GeneratedModelImpl final : public circlechef::GeneratedModel::Impl
  {
  public:
    GeneratedModelImpl(std::unique_ptr<flatbuffers::FlatBufferBuilder> &&builder)
-      : _builder{std::move(builder)}
+    : _builder{std::move(builder)}
    {
      // DO NOTHING
    }
@@ -90,6 +90,7 @@ DataChefRegistry &data_chef_registry(const circlechef::TensorType &type)
    static DataChefRegistry fp32;
    static DataChefRegistry u8;
    static DataChefRegistry boolean;
+  static DataChefRegistry s16;
  
    switch (type)
    {
@@ -103,6 +104,8 @@ DataChefRegistry &data_chef_registry(const circlechef::TensorType &type)
        return u8;
      case circlechef::BOOL:
        return boolean;
+    case circlechef::INT16:
+      return s16;
      default:
        break;
    }
@@ -489,7 +492,7 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
  // Initialize Data Chef Registry
  #define DATA_CHEF(TYPE, NAME, FACTORY_CLASS) \
    data_chef_registry(::circlechef::TYPE)     \
-      .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
+    .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
  #include <souschef/DataChef.def>
  #undef DATA_CHEF
  
@@ -497,7 +500,7 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
    // Create FlatBufferBuilder
    //
    auto flatbuffer_builder =
-      std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+    std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
  
    // Operand-related
    std::vector<flatbuffers::Offset<::circle::Buffer>> buffer_vec;
@@ -510,7 +513,7 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
  
    // Create OperatorCode with Builtin Operator
    std::map<circle::BuiltinOperator, int32_t> builtin_code_map =
-      gather_builtincode_map(model_recipe);
+    gather_builtincode_map(model_recipe);
    for (auto const &opcode : builtin_code_map)
    {
      circle::OperatorCodeBuilder code_builder{*flatbuffer_builder};
@@ -592,7 +595,7 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
  
    // Return "GenerateModel"
    return GeneratedModel{
-      std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
+    std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
  }
  
  } // namespace circlechef
diff --git a/compiler/circlechef/core/src/Op/BCQFullyConnected.cpp b/compiler/circlechef/core/src/Op/BCQFullyConnected.cpp

index 4c82c52ccb36074235d41a76c734fd72954ba5fc..497cbb86b9e953ab69f81998e898d8bc37c05834 100644 (file)
--- a/compiler/circlechef/core/src/Op/BCQFullyConnected.cpp
+++ b/compiler/circlechef/core/src/Op/BCQFullyConnected.cpp
@@ -26,9 +26,9 @@ flatbuffers::Offset<void> BCQFullyConnectedChef::value(flatbuffers::FlatBufferBu
  
    circle::BCQFullyConnectedOptionsBuilder bcq_fully_connected_options_builder{fbb};
    bcq_fully_connected_options_builder.add_weights_hidden_size(
-      operation.bcq_fully_connected_options().weights_hidden_size());
+    operation.bcq_fully_connected_options().weights_hidden_size());
    bcq_fully_connected_options_builder.add_fused_activation_function(
-      as_circle_activation(operation.bcq_fully_connected_options().activation()));
+    as_circle_activation(operation.bcq_fully_connected_options().activation()));
  
    return bcq_fully_connected_options_builder.Finish().Union();
  }
diff --git a/compiler/circlechef/core/src/Op/BCQGather.cpp b/compiler/circlechef/core/src/Op/BCQGather.cpp

index 08f6f611f1a5e37b669ef21f30c888eaf6d640b0..3b343ee6669c9ae5d4ab8e6208e6f7e6c03496a7 100644 (file)
--- a/compiler/circlechef/core/src/Op/BCQGather.cpp
+++ b/compiler/circlechef/core/src/Op/BCQGather.cpp
@@ -24,7 +24,7 @@ flatbuffers::Offset<void> BCQGatherChef::value(flatbuffers::FlatBufferBuilder &f
  
    circle::BCQGatherOptionsBuilder bcq_gather_options_builder{fbb};
    bcq_gather_options_builder.add_input_hidden_size(
-      operation.bcq_gather_options().input_hidden_size());
+    operation.bcq_gather_options().input_hidden_size());
    bcq_gather_options_builder.add_axis(operation.bcq_gather_options().axis());
  
    return bcq_gather_options_builder.Finish().Union();
diff --git a/compiler/circlechef/core/src/Op/BatchMatMul.cpp b/compiler/circlechef/core/src/Op/BatchMatMul.cpp

index d98c0801a2cddafa76a3286ee7d03a58e2ccec4c..645571abe69b01eb497c749518d745fae5e7ba40 100644 (file)
--- a/compiler/circlechef/core/src/Op/BatchMatMul.cpp
+++ b/compiler/circlechef/core/src/Op/BatchMatMul.cpp
@@ -24,9 +24,9 @@ flatbuffers::Offset<void> BatchMatMulChef::value(flatbuffers::FlatBufferBuilder
  
    circle::BatchMatMulOptionsBuilder batch_matmul_options_options_builder{fbb};
    batch_matmul_options_options_builder.add_adjoint_lhs(
-      operation.batch_matmul_options().adjoint_lhs());
+    operation.batch_matmul_options().adjoint_lhs());
    batch_matmul_options_options_builder.add_adjoint_rhs(
-      operation.batch_matmul_options().adjoint_rhs());
+    operation.batch_matmul_options().adjoint_rhs());
  
    return batch_matmul_options_options_builder.Finish().Union();
  }
diff --git a/compiler/circlechef/proto/circlechef.proto b/compiler/circlechef/proto/circlechef.proto

index 83d2dfe9cf13a147a590178994f1dc4dbf299192..1c14b97ff149e599b48b7375555caba6876901f0 100644 (file)
--- a/compiler/circlechef/proto/circlechef.proto
+++ b/compiler/circlechef/proto/circlechef.proto
@@ -19,6 +19,7 @@ enum TensorType {
    UINT8 = 3;
    INT64 = 4;
    BOOL = 6;
+  INT16 = 7;
  }
  
  message TensorShape {
diff --git a/compiler/circlechef/tests/short_int_datatype/test.recipe b/compiler/circlechef/tests/short_int_datatype/test.recipe

new file mode 100644 (file)

index 0000000..e0f5825
--- /dev/null
+++ b/compiler/circlechef/tests/short_int_datatype/test.recipe
@@ -0,0 +1,32 @@
+operand {
+  name: "ifm1"
+  type: INT16
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "constant"
+  type: INT16
+  shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "3.0"
+    arg: "10.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: INT16
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+  type: "BatchMatMul"
+  input: "ifm1"
+  input: "constant"
+  output: "ofm"
+  batch_matmul_options {
+    adjoint_lhs: false
+    adjoint_rhs: false
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/compiler/circlechef/tests/short_int_datatype/test.reverse b/compiler/circlechef/tests/short_int_datatype/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/compiler/circlechef/tools/console/CMakeLists.txt b/compiler/circlechef/tools/console/CMakeLists.txt

index 10168fca3fb470a0f47844476de4c3901ae2f962..faf0a94f090b8772851e091c89a7e6f9c0198bf8 100644 (file)
--- a/compiler/circlechef/tools/console/CMakeLists.txt
+++ b/compiler/circlechef/tools/console/CMakeLists.txt
@@ -1,3 +1,12 @@
  add_executable(circlechef Driver.cpp)
  target_link_libraries(circlechef circlechef_core)
  target_link_libraries(circlechef safemain)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circlechef_test Driver.test.cpp Driver.cpp)
+target_link_libraries(circlechef_test circlechef_core)
diff --git a/compiler/circlechef/tools/console/Driver.cpp b/compiler/circlechef/tools/console/Driver.cpp

index 0909f592781e1ca7e9370c56e61da6bbabe32e62..6aa4c3cc5508d54df8232b0d79c1c10fb9a086c7 100644 (file)
--- a/compiler/circlechef/tools/console/Driver.cpp
+++ b/compiler/circlechef/tools/console/Driver.cpp
@@ -22,7 +22,7 @@
  
  #include <iostream>
  
-int entry(int argc, char **argv)
+int entry_stream(std::istream &is)
  {
    int32_t model_version = 1;
  
@@ -30,7 +30,7 @@ int entry(int argc, char **argv)
  
    // Read a model recipe from standard input
    {
-    google::protobuf::io::IstreamInputStream iis{&std::cin};
+    google::protobuf::io::IstreamInputStream iis{&is};
      if (!google::protobuf::TextFormat::Parse(&iis, &model_recipe))
      {
        std::cerr << "ERROR: Failed to parse recipe" << std::endl;
@@ -56,3 +56,9 @@ int entry(int argc, char **argv)
  
    return 0;
  }
+
+int entry(int, char **)
+{
+  // forward to entry_stream
+  return entry_stream(std::cin);
+}
diff --git a/compiler/circlechef/tools/console/Driver.test.cpp b/compiler/circlechef/tools/console/Driver.test.cpp

new file mode 100644 (file)

index 0000000..d8e4e65
--- /dev/null
+++ b/compiler/circlechef/tools/console/Driver.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// entry function to test from Driver.cpp
+int entry_stream(std::istream &is);
+
+TEST(CircleChefDriverTest, entry_empty_NEG)
+{
+  std::istringstream empty_input("");
+
+  ASSERT_EQ(0, entry_stream(empty_input));
+}
+
+TEST(CircleChefDriverTest, entry_invaid_NEG)
+{
+  std::istringstream empty_input("invalid: input");
+
+  ASSERT_NE(0, entry_stream(empty_input));
+}
+
+TEST(CircleChefDriverTest, entry_invaid_version_NEG)
+{
+  std::istringstream empty_input("version: 9999");
+
+  ASSERT_NE(0, entry_stream(empty_input));
+}
diff --git a/compiler/circlechef/tools/file/Driver.cpp b/compiler/circlechef/tools/file/Driver.cpp

index bcc0c7ae9b960b9b0f6ead65ca7d7c218f057b05..76d0f3f7fb979df6b9904f0c93ff4a565b273cc0 100644 (file)
--- a/compiler/circlechef/tools/file/Driver.cpp
+++ b/compiler/circlechef/tools/file/Driver.cpp
@@ -29,8 +29,8 @@ int entry(int argc, char **argv)
  {
    arser::Arser arser;
    arser.add_argument("recipe")
-      .type(arser::DataType::STR)
-      .help("Source recipe file path to convert");
+    .type(arser::DataType::STR)
+    .help("Source recipe file path to convert");
    arser.add_argument("circle").type(arser::DataType::STR).help("Target circle file path");
  
    try
diff --git a/compiler/circlechef/tools/reverse/Driver.cpp b/compiler/circlechef/tools/reverse/Driver.cpp

index 8a2b85fc72637e24459e0ff3941bb4986635277a..639e0af6fbd33315dd68438f2e8d12f960ce15b2 100644 (file)
--- a/compiler/circlechef/tools/reverse/Driver.cpp
+++ b/compiler/circlechef/tools/reverse/Driver.cpp
@@ -26,8 +26,8 @@ int entry(int argc, char **argv)
  {
    arser::Arser arser;
    arser.add_argument("circle")
-      .type(arser::DataType::STR)
-      .help("Source circle file path to convert");
+    .type(arser::DataType::STR)
+    .help("Source circle file path to convert");
    arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
  
    try
diff --git a/compiler/circledump/README.md b/compiler/circledump/README.md

index 686e918ac3f3a656a5da328700034c47ef622f08..e31c2d560c03567f77d77791f6aff0b18bcb1821 100644 (file)
--- a/compiler/circledump/README.md
+++ b/compiler/circledump/README.md
@@ -67,5 +67,4 @@ O T(3) ofm
  
  - mio-circle
  - safemain
-- stdex
  - FlatBuffers
diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp

index f8e2d61f36cc7c19698de5b12113700eab3985ac..42b4ad97a385ec86d17afd0a6466c5389a5fb81a 100644 (file)
--- a/compiler/circledump/src/Dump.cpp
+++ b/compiler/circledump/src/Dump.cpp
@@ -18,6 +18,7 @@
  
  #include "Read.h"
  #include "OpPrinter.h"
+#include "MetadataPrinter.h"
  
  #include <ostream>
  
@@ -362,6 +363,7 @@ void dump_model(std::ostream &os, const circle::Model *model)
  
    auto opcodes = reader.opcodes();
    auto buffers = reader.buffers();
+  auto metadata = reader.metadata();
  
    // dump operator_codes
    os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
@@ -395,6 +397,26 @@ void dump_model(std::ostream &os, const circle::Model *model)
    }
    os << std::endl;
  
+  // dump metadata
+  if (metadata != nullptr)
+  {
+    os << "metadata : B(index) name" << std::endl;
+    for (uint32_t i = 0; i < metadata->Length(); ++i)
+    {
+      const auto buff_id = metadata->Get(i)->buffer();
+      const auto metadata_name = metadata->Get(i)->name()->str();
+      os << "B(" << buff_id << ") " << metadata_name << std::endl;
+
+      const uint8_t *buff_data;
+      reader.buffer_info(buff_id, &buff_data);
+      if (auto meta_prn = MetadataPrinterRegistry::get().lookup(metadata_name))
+      {
+        meta_prn->print(buff_data, os);
+      }
+    }
+    os << std::endl;
+  }
+
    for (uint32_t sg = 0; sg < num_subgraph; ++sg)
    {
      reader.select_subgraph(sg);
diff --git a/compiler/circledump/src/MetadataPrinter.cpp b/compiler/circledump/src/MetadataPrinter.cpp

new file mode 100644 (file)

index 0000000..f2df9bc
--- /dev/null
+++ b/compiler/circledump/src/MetadataPrinter.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetadataPrinter.h"
+
+#include <cassert>
+#include <string>
+#include <vector>
+
+namespace circledump
+{
+
+class SourceTablePrinter : public MetadataPrinter
+{
+public:
+  /**
+   *  source table consists of following parts
+   *  - [ entry_number : uint32_t ]
+   *  - [ id : uint32_t ][ length : uint32_t ][ data : 'length' Bytes ] * entry_number
+   */
+  virtual void print(const uint8_t *buffer, std::ostream &os) const override
+  {
+    if (buffer)
+    {
+      os << "    [node_id : node_name]" << std::endl;
+      auto cur = buffer;
+      // entry number
+      const uint32_t num = *reinterpret_cast<const uint32_t *>(cur);
+      cur += sizeof(uint32_t);
+      for (uint32_t entry = 0; entry < num; entry++)
+      {
+        // id
+        const uint32_t node_id = *reinterpret_cast<const uint32_t *>(cur);
+        cur += sizeof(uint32_t);
+        // length
+        const uint32_t len = *reinterpret_cast<const uint32_t *>(cur);
+        cur += sizeof(uint32_t);
+        assert(len != 0);
+        // data
+        // non-empty 'data' has trailing '\0'. Let's exclude it.
+        std::string node_name = std::string(cur, cur + len - 1);
+        cur += len;
+
+        // print
+        os << "    [" << node_id << " : " << node_name << "]" << std::endl;
+      }
+    }
+  }
+};
+
+class OpTablePrinter : public MetadataPrinter
+{
+public:
+  /**
+   *  op table consists of following parts
+   *  - [ entry_number : uint32_t ]
+   *  - [ id : uint32_t ][ length : uint32_t ][ origin_ids : length * uint32_t ] * entry_number
+   */
+  virtual void print(const uint8_t *buffer, std::ostream &os) const override
+  {
+    if (buffer)
+    {
+      os << "    [node_id : origin_ids]" << std::endl;
+      auto cur = buffer;
+      // entry number
+      const uint32_t num = *reinterpret_cast<const uint32_t *>(cur);
+      cur += sizeof(uint32_t);
+      for (uint32_t entry = 0; entry < num; entry++)
+      {
+        // id
+        const uint32_t node_id = *reinterpret_cast<const uint32_t *>(cur);
+        cur += sizeof(uint32_t);
+        // length
+        const uint32_t len = *reinterpret_cast<const uint32_t *>(cur);
+        cur += sizeof(uint32_t);
+        assert(len != 0);
+        // origin_ids
+        std::vector<uint32_t> origin_ids;
+        for (uint32_t o = 0; o < len; o++)
+        {
+          origin_ids.push_back(*reinterpret_cast<const uint32_t *>(cur));
+          cur += sizeof(uint32_t);
+        }
+
+        // print
+        os << "    [" << node_id << " : ";
+        uint32_t i = 0;
+        for (const auto &id : origin_ids)
+        {
+          if (i++)
+            os << ", ";
+          os << id;
+        }
+        os << "]" << std::endl;
+      }
+    }
+  }
+};
+
+MetadataPrinterRegistry::MetadataPrinterRegistry()
+{
+  _metadata_map["ONE_source_table"] = std::make_unique<SourceTablePrinter>();
+  _metadata_map["ONE_op_table"] = std::make_unique<OpTablePrinter>();
+}
+
+} // namespace circledump
diff --git a/compiler/circledump/src/MetadataPrinter.h b/compiler/circledump/src/MetadataPrinter.h

new file mode 100644 (file)

index 0000000..1dca2ca
--- /dev/null
+++ b/compiler/circledump/src/MetadataPrinter.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLEDUMP_METADATA_PRINTER_H__
+#define __CIRCLEDUMP_METADATA_PRINTER_H__
+
+#include <ostream>
+#include <string>
+#include <map>
+#include <memory>
+
+namespace circledump
+{
+
+class MetadataPrinter
+{
+public:
+  virtual void print(const uint8_t * /* buffer */, std::ostream &) const = 0;
+};
+
+class MetadataPrinterRegistry
+{
+public:
+  MetadataPrinterRegistry();
+
+public:
+  const MetadataPrinter *lookup(std::string op) const
+  {
+    if (_metadata_map.find(op) == _metadata_map.end())
+      return nullptr;
+
+    return _metadata_map.at(op).get();
+  }
+
+public:
+  static MetadataPrinterRegistry &get()
+  {
+    static MetadataPrinterRegistry me;
+    return me;
+  }
+
+private:
+  std::map<std::string /* metadata name */, std::unique_ptr<MetadataPrinter>> _metadata_map;
+};
+
+} // namespace circledump
+
+#endif // __CIRCLEDUMP_METADATA_PRINTER_H__
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp

index ef22baaeef57bccb16a59bcb016a25a8904a3780..5319bb88d32832200cdfcd69138eec6882cead7d 100644 (file)
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -90,6 +90,26 @@ public:
    }
  };
  
+class BidirectionalSequenceLSTMPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_BidirectionalSequenceLSTMOptions())
+    {
+      os << "    ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "cell_clip(" << params->cell_clip() << ") ";
+      os << "proj_clip(" << params->proj_clip() << ") ";
+      os << "time_major(" << params->time_major() << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << "merge_outputs(" << params->merge_outputs() << ") ";
+      os << std::endl;
+    }
+  }
+};
+
  class CastPrinter : public OpPrinter
  {
  public:
@@ -279,7 +299,7 @@ public:
        os << "Stride.H(" << conv_params->stride_h() << ") ";
        os << "DepthMultiplier(" << conv_params->depth_multiplier() << ") ";
        os << "Dilation.W(" << conv_params->dilation_w_factor() << ") ";
-      os << "Dilation.H(" << conv_params->dilation_h_factor() << ")";
+      os << "Dilation.H(" << conv_params->dilation_h_factor() << ") ";
        os << "Activation("
           << EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ") ";
        os << std::endl;
@@ -287,6 +307,25 @@ public:
    }
  };
  
+class FakeQuantPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_FakeQuantOptions())
+    {
+      os << "    ";
+      os << "Min(" << params->min() << ") ";
+      os << "Max(" << params->max() << ") ";
+      os << "NumBits(" << params->num_bits() << ") ";
+      os << std::boolalpha;
+      os << "NarrowRange(" << params->narrow_range() << ") ";
+      os << std::noboolalpha;
+      os << std::endl;
+    }
+  }
+};
+
  class FullyConnectedPrinter : public OpPrinter
  {
  public:
@@ -720,6 +759,8 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_ARG_MIN] = make_unique<ArgMinPrinter>();
    _op_map[circle::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<Pool2DPrinter>();
    _op_map[circle::BuiltinOperator_BATCH_MATMUL] = make_unique<BatchMatMulPrinter>();
+  _op_map[circle::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM] =
+    make_unique<BidirectionalSequenceLSTMPrinter>();
    _op_map[circle::BuiltinOperator_CAST] = make_unique<CastPrinter>();
    // There is no Option for CEIL
    _op_map[circle::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
@@ -728,6 +769,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
    // There is no Option for DEQUANTIZE
    _op_map[circle::BuiltinOperator_DIV] = make_unique<DivPrinter>();
+  _op_map[circle::BuiltinOperator_FAKE_QUANT] = make_unique<FakeQuantPrinter>();
    // There is no Option for FLOOR
    // There is no Option for FLOOR_MOD
    _op_map[circle::BuiltinOperator_FULLY_CONNECTED] = make_unique<FullyConnectedPrinter>();
@@ -737,7 +779,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_L2_POOL_2D] = make_unique<Pool2DPrinter>();
    _op_map[circle::BuiltinOperator_LEAKY_RELU] = make_unique<LeakyReluPrinter>();
    _op_map[circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION] =
-      make_unique<LocalResponseNormalizationPrinter>();
+    make_unique<LocalResponseNormalizationPrinter>();
    // There is no Option for LOG
    // There is no Option for LOGISTIC
    // There is no Option for LOG_SOFTMAX
@@ -761,7 +803,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_RESHAPE] = make_unique<ReshapePrinter>();
    _op_map[circle::BuiltinOperator_RESIZE_BILINEAR] = make_unique<ResizeBilinearPrinter>();
    _op_map[circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR] =
-      make_unique<ResizeNearestNeighborPrinter>();
+    make_unique<ResizeNearestNeighborPrinter>();
    _op_map[circle::BuiltinOperator_REVERSE_SEQUENCE] = make_unique<ReverseSequencePrinter>();
    // There is no Option for ROUND
    // There is no Option for SELECT
@@ -782,7 +824,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
    // There is no Option for TOPK_V2
    _op_map[circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
-      make_unique<UnidirectionalSequenceLSTMPrinter>();
+    make_unique<UnidirectionalSequenceLSTMPrinter>();
    _op_map[circle::BuiltinOperator_UNIQUE] = make_unique<UniquePrinter>();
    _op_map[circle::BuiltinOperator_WHILE] = make_unique<WhilePrinter>();
    _op_map[circle::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp

index 0532255364aeb7546a07459ba96799bd79cf12f4..db8298585b626588957ed99abbd3864605f454ea 100644 (file)
--- a/compiler/circledump/src/Read.cpp
+++ b/compiler/circledump/src/Read.cpp
@@ -81,6 +81,7 @@ Reader::Reader(const circle::Model *model)
    _version = model->version();
    _subgraphs = model->subgraphs();
    _buffers = model->buffers();
+  _metadata = model->metadata();
  
    auto opcodes = model->operator_codes();
    for (const ::circle::OperatorCode *opcode : *opcodes)
diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h

index dd1ef20b60d63bf798fb8879763f2b6371d2fb9d..be0e15827de8819aaf47ac6cfe1e0f6fa811dd24 100644 (file)
--- a/compiler/circledump/src/Read.h
+++ b/compiler/circledump/src/Read.h
@@ -52,6 +52,7 @@ private:
    using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
    using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
    using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
+  using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>;
  
  public:
    Reader(const circle::Model *model);
@@ -68,6 +69,7 @@ public:
    const std::vector<int32_t> &inputs() const { return _inputs; }
    const std::vector<int32_t> &outputs() const { return _outputs; }
    const circle::DataFormat &data_format() const { return _data_format; }
+  const CircleMetadata_t *metadata() const { return _metadata; }
  
    uint32_t num_subgraph() const { return _subgraphs->Length(); }
  
@@ -87,6 +89,7 @@ private:
    const CircleBuffers_t *_buffers{nullptr};
    const CircleTensors_t *_tensors{nullptr};
    const CircleOperators_t *_operators{nullptr};
+  const CircleMetadata_t *_metadata{nullptr};
  
    uint32_t _subgraph_index;
    std::string _subgraph_name;
diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt

index 22948fff94dc4a4969ff0e0ba4f838ec62dcafbb..2ab8c05292fda5e86d73ae9677a89372b22b608f 100644 (file)
--- a/compiler/cli/CMakeLists.txt
+++ b/compiler/cli/CMakeLists.txt
@@ -12,4 +12,3 @@ endif(NOT GTest_FOUND)
  
  GTest_AddTEst(cli_test ${TESTS})
  target_link_libraries(cli_test cli)
-target_link_libraries(cli_test stdex)
diff --git a/compiler/cli/src/App.test.cpp b/compiler/cli/src/App.test.cpp

index fe2d441794ff03dd0dec00f136516e12e557d5c2..59e5da3bd30784c3715a0982b22770f8cdca8d41 100644 (file)
--- a/compiler/cli/src/App.test.cpp
+++ b/compiler/cli/src/App.test.cpp
@@ -16,7 +16,7 @@
  
  #include "cli/App.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -52,7 +52,7 @@ TEST(APP, run)
    cli::App app("test");
  
    std::string args;
-  app.insert("record", stdex::make_unique<RecordCommand>(3, args));
+  app.insert("record", std::make_unique<RecordCommand>(3, args));
  
    const char *argv[] = {"record", "hello", "world"};
  
diff --git a/compiler/coco/core/CMakeLists.txt b/compiler/coco/core/CMakeLists.txt

index 8c6844733c1e1e7633e39816211282258047415e..a81d366c9577937c48d91aaa74cff5fd8fe6a0cc 100644 (file)
--- a/compiler/coco/core/CMakeLists.txt
+++ b/compiler/coco/core/CMakeLists.txt
@@ -7,7 +7,6 @@ target_include_directories(coco_core PUBLIC include)
  # NOTE Some coco_core PUBLIC headers include angkor headers
  target_link_libraries(coco_core PUBLIC angkor)
  target_link_libraries(coco_core PRIVATE pepper_assert)
-target_link_libraries(coco_core PRIVATE stdex)
  # Let's apply nncc common compile options
  # NOTE This will enable strict compilation (warnings as error).
  #      Please refer to top-level CMakeLists.txt for details
@@ -22,4 +21,3 @@ nnas_find_package(GTest REQUIRED)
  
  GTest_AddTest(coco_core_test ${TESTS})
  target_link_libraries(coco_core_test coco_core)
-target_link_libraries(coco_core_test stdex)
diff --git a/compiler/coco/core/include/coco/IR/FeatureShape.h b/compiler/coco/core/include/coco/IR/FeatureShape.h

index 015fc709d409a9205ffe5e8e381e973363800973..3c8e9accdad06878a5f9c5eb6fcc762b6e2cf5c3 100644 (file)
--- a/compiler/coco/core/include/coco/IR/FeatureShape.h
+++ b/compiler/coco/core/include/coco/IR/FeatureShape.h
@@ -31,13 +31,13 @@ class FeatureShape : public nncc::core::ADT::feature::Shape
  {
  public:
    FeatureShape(uint32_t depth, uint32_t height, uint32_t width)
-      : Shape{depth, height, width}, _batch{1}
+    : Shape{depth, height, width}, _batch{1}
    {
      // DO NOTHING
    }
  
    FeatureShape(uint32_t batch, uint32_t depth, uint32_t height, uint32_t width)
-      : Shape{depth, height, width}, _batch{batch}
+    : Shape{depth, height, width}, _batch{batch}
    {
      // DO NOTHING
    }
diff --git a/compiler/coco/core/include/coco/IR/Locatable.h b/compiler/coco/core/include/coco/IR/Locatable.h

index b80a4a3602cb3eef26f864ab1729060faf4fa97a..549802776721d5e8788d6cc790f8657f0c715a87 100644 (file)
--- a/compiler/coco/core/include/coco/IR/Locatable.h
+++ b/compiler/coco/core/include/coco/IR/Locatable.h
@@ -24,7 +24,7 @@ namespace coco
  
  /**
   * @brief Return the associated instruction if exists.
-  */
+ */
  struct Locatable
  {
    virtual ~Locatable() = default;
diff --git a/compiler/coco/core/include/coco/IR/Ops.h b/compiler/coco/core/include/coco/IR/Ops.h

index 01ac92b7f602af151230b42cd0831fdf5dbe021d..39dce52728c16e756c63d791ff6bf3ac9aeb0ebc 100644 (file)
--- a/compiler/coco/core/include/coco/IR/Ops.h
+++ b/compiler/coco/core/include/coco/IR/Ops.h
@@ -407,6 +407,6 @@ public:
    const Sqrt *asSqrt(void) const override { return this; }
  };
  
-} // namesapce coco
+} // namespace coco
  
  #endif // __COCO_IR_OPS_H__
diff --git a/compiler/coco/core/include/coco/IR/Padding2D.h b/compiler/coco/core/include/coco/IR/Padding2D.h

index b764656ccad766636a8c79c9ee0bbc80410ee701..68a3481f1ff4328cd566627db7ac73679700ac1c 100644 (file)
--- a/compiler/coco/core/include/coco/IR/Padding2D.h
+++ b/compiler/coco/core/include/coco/IR/Padding2D.h
@@ -32,7 +32,7 @@ public:
  
  public:
    Padding2D(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
-      : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+    : _top{top}, _bottom{bottom}, _left{left}, _right{right}
    {
      // DO NOTHING
    }
diff --git a/compiler/coco/core/src/ADT/PtrList.test.cpp b/compiler/coco/core/src/ADT/PtrList.test.cpp

index dcbad8b9091999c1d1837bdd59c9c6c4cf29c699..904dd6e1de7cd02f20c4334d858d1f544adbea82 100644 (file)
--- a/compiler/coco/core/src/ADT/PtrList.test.cpp
+++ b/compiler/coco/core/src/ADT/PtrList.test.cpp
@@ -25,7 +25,7 @@ namespace
  struct Object
  {
  };
-}
+} // namespace
  
  TEST(ADT_PTR_LIST, ctor)
  {
diff --git a/compiler/coco/core/src/ADT/PtrManager.test.cpp b/compiler/coco/core/src/ADT/PtrManager.test.cpp

index bb9056f2953a0c837d744c6b61df4398960aa549..5a9f09d4e736624c58ad067fed18d1b3a1367218 100644 (file)
--- a/compiler/coco/core/src/ADT/PtrManager.test.cpp
+++ b/compiler/coco/core/src/ADT/PtrManager.test.cpp
@@ -61,7 +61,7 @@ struct ObjectManager final : public coco::PtrManager<Object>
  
    void free(Object *o) { release(o); }
  };
-}
+} // namespace
  
  TEST(ADT_PTR_MANAGER, usecase)
  {
diff --git a/compiler/coco/core/src/IR/BagManager.cpp b/compiler/coco/core/src/IR/BagManager.cpp

index 10fe69d571b28aa0227e5ceef4b1d8694ca9df67..8cfb0c09cff4fad8cfcb1bb024a05efca49b7cc5 100644 (file)
--- a/compiler/coco/core/src/IR/BagManager.cpp
+++ b/compiler/coco/core/src/IR/BagManager.cpp
@@ -16,14 +16,14 @@
  
  #include "coco/IR/BagManager.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace coco
  {
  
  Bag *BagManager::create(uint32_t size)
  {
-  auto bag = stdex::make_unique<Bag>(size);
+  auto bag = std::make_unique<Bag>(size);
    modulize(bag.get());
    return take(std::move(bag));
  }
diff --git a/compiler/coco/core/src/IR/BlockManager.cpp b/compiler/coco/core/src/IR/BlockManager.cpp

index 5e3b88173dfa169ff62d9f5beadcad248730386d..d1bcacb32d13e8b6ca768b2a2f3184fe7912d9a3 100644 (file)
--- a/compiler/coco/core/src/IR/BlockManager.cpp
+++ b/compiler/coco/core/src/IR/BlockManager.cpp
@@ -16,8 +16,7 @@
  
  #include "coco/IR/BlockManager.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace coco
@@ -25,7 +24,7 @@ namespace coco
  
  Block *BlockManager::create(void)
  {
-  auto blk = stdex::make_unique<Block>();
+  auto blk = std::make_unique<Block>();
    modulize(blk.get());
    return take(std::move(blk));
  }
diff --git a/compiler/coco/core/src/IR/Conv2D.test.cpp b/compiler/coco/core/src/IR/Conv2D.test.cpp

index df0a2470b7590ca5806fd91baf88d42969864b5a..5bf06ca9fcc16c83cc89c77b7ed7a72a2242000d 100644 (file)
--- a/compiler/coco/core/src/IR/Conv2D.test.cpp
+++ b/compiler/coco/core/src/IR/Conv2D.test.cpp
@@ -20,11 +20,9 @@
  #include <vector>
  #include <memory>
  
-#include <stdex/Memory.h>
-
  #include <gtest/gtest.h>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/coco/core/src/IR/Def.test.cpp b/compiler/coco/core/src/IR/Def.test.cpp

index 98455c09ee68c01223c08d6d395a1ea00c7cdb1f..443fdcb956fc713ea553267f7962b42ffda62edf 100644 (file)
--- a/compiler/coco/core/src/IR/Def.test.cpp
+++ b/compiler/coco/core/src/IR/Def.test.cpp
@@ -19,13 +19,13 @@
  
  #include "coco/IR/FeatureObject.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include "Producer.mock.h"
  
  #include <gtest/gtest.h>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/coco/core/src/IR/InputManager.cpp b/compiler/coco/core/src/IR/InputManager.cpp

index 6d5b9470bdc8c20e7e2df1c8758119a3091b29d1..0530deeda11fe79e0ffbbf9bd0b3cda8dc9d2ee9 100644 (file)
--- a/compiler/coco/core/src/IR/InputManager.cpp
+++ b/compiler/coco/core/src/IR/InputManager.cpp
@@ -16,14 +16,14 @@
  
  #include "coco/IR/InputManager.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace coco
  {
  
  Input *InputManager::create(const nncc::core::ADT::tensor::Shape &shape)
  {
-  auto input = stdex::make_unique<Input>(shape);
+  auto input = std::make_unique<Input>(shape);
    modulize(input.get());
    return take(std::move(input));
  }
diff --git a/compiler/coco/core/src/IR/Module.cpp b/compiler/coco/core/src/IR/Module.cpp

index 0b65ceedc2840e63c26b05ad98ce7c178a2faba0..420cf6f0c3a0540ba6307417f7c19ac7c51692c4 100644 (file)
--- a/compiler/coco/core/src/IR/Module.cpp
+++ b/compiler/coco/core/src/IR/Module.cpp
@@ -16,9 +16,9 @@
  
  #include "coco/IR/Module.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/coco/core/src/IR/ObjectManager.cpp b/compiler/coco/core/src/IR/ObjectManager.cpp

index 1b7215a041ba8aa5997b37830b9f026858f63311..38c3a9bcc10dabe6e52ced503ded8b7d3a6ccffc 100644 (file)
--- a/compiler/coco/core/src/IR/ObjectManager.cpp
+++ b/compiler/coco/core/src/IR/ObjectManager.cpp
@@ -19,11 +19,10 @@
  #include "coco/IR/FeatureObject.h"
  #include "coco/IR/KernelObject.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace coco
  {
diff --git a/compiler/coco/core/src/IR/OpManager.cpp b/compiler/coco/core/src/IR/OpManager.cpp

index c87b704feffea358a878eb16af31ebc94847765a..911f999c79daac84968bfbe1cbdd3f68442985ae 100644 (file)
--- a/compiler/coco/core/src/IR/OpManager.cpp
+++ b/compiler/coco/core/src/IR/OpManager.cpp
@@ -16,13 +16,12 @@
  
  #include "coco/IR/OpManager.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  #include <queue>
  #include <set>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace coco
  {
diff --git a/compiler/coco/core/src/IR/Ops.test.cpp b/compiler/coco/core/src/IR/Ops.test.cpp

index ae979b2bfcfad95d00c1f2d8a94c38e84b77c39d..cfbd3ca7095efe7aaee98774227b6be2438d0367 100644 (file)
--- a/compiler/coco/core/src/IR/Ops.test.cpp
+++ b/compiler/coco/core/src/IR/Ops.test.cpp
@@ -21,11 +21,9 @@
  #include <vector>
  #include <memory>
  
-#include <stdex/Memory.h>
-
  #include <gtest/gtest.h>
  
-using stdex::make_unique;
+using std::make_unique;
  
  /**
   * Section: Add Op
diff --git a/compiler/coco/core/src/IR/OutputManager.cpp b/compiler/coco/core/src/IR/OutputManager.cpp

index 86b9580acb34d144fb10bebf24456d1587696cbc..5dd51c378affa779d93c1a2dd579f11fc0143677 100644 (file)
--- a/compiler/coco/core/src/IR/OutputManager.cpp
+++ b/compiler/coco/core/src/IR/OutputManager.cpp
@@ -16,14 +16,14 @@
  
  #include "coco/IR/OutputManager.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace coco
  {
  
  Output *OutputManager::create(const nncc::core::ADT::tensor::Shape &shape)
  {
-  auto output = stdex::make_unique<Output>(shape);
+  auto output = std::make_unique<Output>(shape);
    modulize(output.get());
    return take(std::move(output));
  }
diff --git a/compiler/coco/core/src/IR/Part.test.cpp b/compiler/coco/core/src/IR/Part.test.cpp

index 87e0e151609f221a42043239c7ddbfaaefa5d1cb..4348d4db256d1315136f1e6cf40f5093289d525f 100644 (file)
--- a/compiler/coco/core/src/IR/Part.test.cpp
+++ b/compiler/coco/core/src/IR/Part.test.cpp
@@ -17,11 +17,11 @@
  #include "coco/IR/Part.h"
  #include "coco/IR/Op.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/coco/core/src/IR/Use.test.cpp b/compiler/coco/core/src/IR/Use.test.cpp

index 3191e98526113ab4d091ab30ba1f7def292e579e..b7026385fd56cbde1b6385691ade78dd40c7f8b0 100644 (file)
--- a/compiler/coco/core/src/IR/Use.test.cpp
+++ b/compiler/coco/core/src/IR/Use.test.cpp
@@ -21,11 +21,11 @@
  
  #include "Consumer.mock.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/coco/generic/CMakeLists.txt b/compiler/coco/generic/CMakeLists.txt

index 02fbf67f530b48ba198fafaf1a937ceff53b0992..c65c84c0687232ddf8253594efb0366e371edb5f 100644 (file)
--- a/compiler/coco/generic/CMakeLists.txt
+++ b/compiler/coco/generic/CMakeLists.txt
@@ -5,7 +5,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
  add_library(coco_generic SHARED ${SOURCES})
  target_include_directories(coco_generic PUBLIC include)
  target_link_libraries(coco_generic PUBLIC coco_core)
-target_link_libraries(coco_generic PRIVATE stdex)
  target_link_libraries(coco_generic PRIVATE nncc_common)
  
  if(NOT ENABLE_TEST)
@@ -17,6 +16,3 @@ nnas_find_package(GTest REQUIRED)
  
  GTest_AddTest(coco_generic_test ${TESTS})
  target_link_libraries(coco_generic_test coco_generic)
-# stdex is a PRIVATE dependency of coco_generic, and thus is not linked to coco_generic_test
-# even though coco_generic_test is linked to coco_generic
-target_link_libraries(coco_generic_test stdex)
diff --git a/compiler/coco/generic/src/IR/Data.cpp b/compiler/coco/generic/src/IR/Data.cpp

index b719472539721b1e6d5ce1bc159c08a11b77f26d..5ab7069ee71fc481d45ed9bc82d88c860ca40486 100644 (file)
--- a/compiler/coco/generic/src/IR/Data.cpp
+++ b/compiler/coco/generic/src/IR/Data.cpp
@@ -19,13 +19,12 @@
  #include <nncc/core/ADT/kernel/NCHWLayout.h>
  #include <nncc/core/ADT/kernel/Overlay.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <map>
  
  using namespace nncc::core::ADT;
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
@@ -71,7 +70,7 @@ public:
  private:
    std::map<const coco::Bag *, std::unique_ptr<std::vector<uint8_t>>> _data;
  };
-}
+} // namespace
  
  namespace
  {
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt

index ec9e3cf85e1de662f373164c585547179625013f..e93a66ef0f0c0370583caceb786a87089e573a09 100644 (file)
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -171,9 +171,9 @@ foreach(RECIPE IN ITEMS ${RECIPES})
    if(DEFINED RULE_SOURCE_PATH)
      # Copy .rule
      add_custom_command(OUTPUT ${RULE_BINARY_PATH}
-    COMMAND ${CMAKE_COMMAND} -E copy "${RULE_SOURCE_PATH}" "${RULE_BINARY_PATH}"
-    DEPENDS ${RULE_SOURCE_PATH}
-    COMMENT "Generate ${RULE_FILE}"
+      COMMAND ${CMAKE_COMMAND} -E copy "${RULE_SOURCE_PATH}" "${RULE_BINARY_PATH}"
+      DEPENDS ${RULE_SOURCE_PATH}
+      COMMENT "Generate ${RULE_FILE}"
      )
      list(APPEND TEST_DEPS ${RULE_BINARY_PATH})
    endif()
@@ -188,21 +188,21 @@ foreach(RECIPE IN ITEMS ${RECIPES})
      list(APPEND TEST_DEPS ${TFLITE_OUTPUT_PATH})
  
      if(NOT DEFINED NO_CIRCLIZE_${RECIPE})
-    # Generate .circle
-    add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
-      COMMAND $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH} ${CIRCLE_OUTPUT_PATH}
-      DEPENDS $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH}
-      COMMENT "Generate ${CIRCLE_FILE}"
-    )
-    set(MODEL_FORMAT "circle")
-    list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
+      # Generate .circle
+      add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
+        COMMAND $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH} ${CIRCLE_OUTPUT_PATH}
+        DEPENDS $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH}
+        COMMENT "Generate ${CIRCLE_FILE}"
+      )
+      set(MODEL_FORMAT "circle")
+      list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
      endif()
    else()
      # Generate .circle
      add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
-    COMMAND $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH} ${CIRCLE_OUTPUT_PATH}
-    DEPENDS $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH}
-    COMMENT "Generate ${CIRCLE_FILE}"
+      COMMAND $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH} ${CIRCLE_OUTPUT_PATH}
+      DEPENDS $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH}
+      COMMENT "Generate ${CIRCLE_FILE}"
      )
      list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
    endif()
@@ -213,7 +213,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
    if(NOT DEFINED NO_OPTIMIZE_${RECIPE})
      # Generate optimized .circle
      add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH}
-      COMMAND $<TARGET_FILE:circle2circle> --all ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
+      COMMAND $<TARGET_FILE:circle2circle> --O1 ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
        DEPENDS $<TARGET_FILE:circle2circle>  ${CIRCLE_OUTPUT_PATH}
        COMMENT "Generate ${OPT_CIRCLE_FILE}"
      )
@@ -224,54 +224,43 @@ foreach(RECIPE IN ITEMS ${RECIPES})
    set(MODEL_FILE "${RECIPE}${OPT_FORMAT}.${MODEL_FORMAT}")
    set(MODEL_PATH "${CMAKE_CURRENT_BINARY_DIR}/${MODEL_FILE}")
    set(NNPKG_FILE "${RECIPE}${OPT_FORMAT}")
-  set(NNPKG_PATH "${CMAKE_CURRENT_BINARY_DIR}/${NNPKG_FILE}")
+  set(NNPKG_DIR "${CMAKE_CURRENT_BINARY_DIR}/${NNPKG_FILE}")
+  set(NNPKG_MODEL "${NNPKG_DIR}/${MODEL_FILE}")
+
+  # Generate nnpackage directory
+  add_custom_command(OUTPUT ${NNPKG_DIR}
+      COMMAND ${CMAKE_COMMAND} -E make_directory ${NNPKG_DIR}
+      DEPENDS ${MODEL_PATH}
+      COMMENT "Generate ${RECIPE} nnpackage directory"
+    )
+  list(APPEND TEST_DEPS ${NNPKG_DIR})
  
-  add_custom_command(OUTPUT ${NNPKG_PATH}
+  add_custom_command(OUTPUT ${NNPKG_MODEL}
      COMMAND ${MODEL2NNPKG} ${MODEL_PATH}
-    DEPENDS ${MODEL2NNPKG} ${MODEL_PATH}
+    DEPENDS ${MODEL2NNPKG} ${MODEL_PATH} ${NNPKG_DIR}
      COMMENT "Generate ${RECIPE} nnpackage"
    )
-  list(APPEND TEST_DEPS ${NNPKG_PATH})
-
-  set(INPUT_HDF5_FILE "${RECIPE}${OPT_FORMAT}.input.h5")
-  set(INPUT_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_HDF5_FILE}")
-
-  set(EXPECTED_HDF5_FILE "${RECIPE}${OPT_FORMAT}.expected.h5")
-  set(EXPECTED_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${EXPECTED_HDF5_FILE}")
+  list(APPEND TEST_DEPS ${NNPKG_MODEL})
  
    if(NOT DEFINED NO_TCGEN_${RECIPE})
-    # Generate input.h5, expected.h5
-    add_custom_command(OUTPUT ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
-      COMMAND $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
-      DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
-      COMMENT "Generate ${INPUT_BIN_PATH} and ${EXPECTED_BIN_PATH}"
-    )
-
      # Generate test directory
-    set(TC_DIRECTORY "${NNPKG_PATH}/metadata/tc")
+    set(TC_DIRECTORY "${NNPKG_DIR}/metadata/tc")
      add_custom_command(OUTPUT ${TC_DIRECTORY}
        COMMAND ${CMAKE_COMMAND} -E make_directory ${TC_DIRECTORY}
-      DEPENDS ${NNPKG_PATH}
+      DEPENDS ${NNPKG_DIR}
        COMMENT "Generate ${RECIPE} nnpackage test directory"
      )
+    list(APPEND TEST_DEPS ${TC_DIRECTORY})
  
-    # Move input hdf5 file to test directory
-    set(INPUT_NNPKG_PATH "${TC_DIRECTORY}/input.h5")
-    add_custom_command(OUTPUT ${INPUT_NNPKG_PATH}
-      COMMAND ${CMAKE_COMMAND} -E rename ${INPUT_BIN_PATH} ${INPUT_NNPKG_PATH}
-      DEPENDS ${INPUT_BIN_PATH} ${TC_DIRECTORY}
-      COMMENT "Move ${INPUT_HDF5_FILE} to nnpackage"
-    )
-
-    # Move expected hdf5 file to test directory
-    set(EXPECTED_NNPKG_PATH "${TC_DIRECTORY}/expected.h5")
-    add_custom_command(OUTPUT ${EXPECTED_NNPKG_PATH}
-      COMMAND ${CMAKE_COMMAND} -E rename ${EXPECTED_BIN_PATH} ${EXPECTED_NNPKG_PATH}
-      DEPENDS ${EXPECTED_BIN_PATH} ${TC_DIRECTORY}
-      COMMENT "Move ${EXPECTED_HDF5_FILE} to nnpackage"
+    # Generate input.h5, expected.h5
+    set(INPUT_HDF5_FILE "${TC_DIRECTORY}/input.h5")
+    set(EXPECTED_HDF5_FILE "${TC_DIRECTORY}/expected.h5")
+    add_custom_command(OUTPUT ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE}
+      COMMAND $<TARGET_FILE:testDataGenerator> --input_data ${INPUT_HDF5_FILE} --expected_data ${EXPECTED_HDF5_FILE} ${MODEL_FILE}
+      DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE} ${TC_DIRECTORY}
+      COMMENT "Generate ${INPUT_HDF5_FILE} and ${EXPECTED_HDF5_FILE}"
      )
-    list(APPEND TEST_DEPS ${TC_DIRECTORY} ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
-                          ${INPUT_NNPKG_PATH} ${EXPECTED_NNPKG_PATH})
+    list(APPEND TEST_DEPS ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE})
    endif()
  endforeach()
  
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst

index 34a4d2c6a0f3a21f6837c69d0de9aba65a771b08..b9b758fe7f60e203418de6c1ed61f85ab57e1c2f 100644 (file)
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -28,6 +28,7 @@ tcgenerate(BatchMatMul_000)
  tcgenerate(BatchMatMulV2_000)
  tcgenerate(BatchMatMulV2_001)
  tcgenerate(BatchToSpaceND_000)
+tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator
  tcgenerate(Cast_000)
  tcgenerate(Cast_001)
  tcgenerate(Ceil_000)
@@ -41,6 +42,8 @@ tcgenerate(ExpandDims_000)
  tcgenerate(ExpandDims_001)
  tcgenerate(ExpandDims_002)
  tcgenerate(ExpandDims_003)
+tcgenerate(ExpandDims_004)
+tcgenerate(FakeQuant_000) # runtime and luci-interpreter doesn't support yet
  tcgenerate(Fill_000)
  tcgenerate(Fill_001)
  tcgenerate(FloorMod_000)
@@ -60,6 +63,9 @@ tcgenerate(MatrixSetDiag_000)
  tcgenerate(MaxPoolWithArgMax_000)
  tcgenerate(MaxPoolWithArgMax_001)
  tcgenerate(MaxPoolWithArgMax_002)
+tcgenerate(Mean_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(Mean_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(Mean_U8_dynamic_000) # TestDataGenerator does not support unknown dimension
  tcgenerate(NonMaxSuppressionV4_000)
  tcgenerate(NonMaxSuppressionV4_001)
  tcgenerate(NonMaxSuppressionV5_000)
@@ -67,10 +73,8 @@ tcgenerate(NonMaxSuppressionV5_001)
  tcgenerate(MirrorPad_000)
  tcgenerate(Mul_U8_000)
  tcgenerate(Neg_000)
+tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator
  tcgenerate(Net_Dangle_001)
-tcgenerate(Net_InstanceNorm_001)
-tcgenerate(Net_InstanceNorm_002)
-tcgenerate(Net_InstanceNorm_003)
  tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
  tcgenerate(OneHot_000)
  tcgenerate(OneHot_001)
@@ -85,24 +89,26 @@ tcgenerate(ReduceAny_000)
  tcgenerate(ReduceAny_001)
  tcgenerate(ReduceAny_002)
  tcgenerate(ReduceAny_003)
-tcgenerate(ReduceAny_dynamic_000)
-tcgenerate(ReduceAny_dynamic_001)
-tcgenerate(ReduceAny_dynamic_002)
-tcgenerate(ReduceAny_dynamic_003)
+tcgenerate(ReduceAny_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_002) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_003) # TestDataGenerator does not support unknown dimension
  tcgenerate(ReduceMax_000)
-tcgenerate(ReduceMax_dynamic_000)
+tcgenerate(ReduceMax_dynamic_000) # TestDataGenerator does not support unknown dimension
  tcgenerate(ReduceMin_000)
-tcgenerate(ReduceMin_dynamic_000)
+tcgenerate(ReduceMin_dynamic_000) # TestDataGenerator does not support unknown dimension
  tcgenerate(ReduceProd_000)
  tcgenerate(ReduceProd_001)
  tcgenerate(ReduceProd_002)
  tcgenerate(ReduceProd_003)
-tcgenerate(ReduceProd_dynamic_000)
-tcgenerate(ReduceProd_dynamic_001)
-tcgenerate(ReduceProd_dynamic_002)
-tcgenerate(ReduceProd_dynamic_003)
+tcgenerate(ReduceProd_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_002) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_003) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReLU_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReLU6_dynamic_000) # TestDataGenerator does not support unknown dimension
  tcgenerate(ReLUN1To1_000)
-tcgenerate(ReLUN1To1_dynamic_000)
+tcgenerate(ReLUN1To1_dynamic_000) # TestDataGenerator does not support unknown dimension
  tcgenerate(Reshape_003) # luci-interpreter doesn't support reshape without built-in option
  tcgenerate(ReverseSequence_000)
  tcgenerate(ReverseV2_000)
@@ -117,6 +123,7 @@ tcgenerate(SelectV2_001)
  tcgenerate(SelectV2_002)
  tcgenerate(Shape_000)
  tcgenerate(Sin_000)
+tcgenerate(Slice_001) # luci-interpreter doesn't support Slice with -1
  tcgenerate(SpaceToBatchND_000)
  tcgenerate(SpaceToBatchND_001)
  tcgenerate(SpaceToBatchND_002)
@@ -124,11 +131,10 @@ tcgenerate(SpaceToBatchND_003)
  tcgenerate(SparseToDense_000)
  tcgenerate(SplitV_000)
  tcgenerate(Square_000)
-tcgenerate(SquaredDifference_000)
  tcgenerate(Sum_000)
  tcgenerate(Sum_001)
-tcgenerate(Sum_dynamic_000)
-tcgenerate(Sum_dynamic_001)
+tcgenerate(Sum_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(Sum_dynamic_001) # TestDataGenerator does not support unknown dimension
  tcgenerate(Tile_000)
  tcgenerate(Tile_U8_000)
  tcgenerate(TopKV2_000)
diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp

index f8f01444281e3068ade9bc0cf0a0215f36091956..be6bb5ba99d9b355894088979cec9226e275139f 100644 (file)
--- a/compiler/common-artifacts/src/TestDataGenerator.cpp
+++ b/compiler/common-artifacts/src/TestDataGenerator.cpp
@@ -34,7 +34,7 @@ namespace
  uint32_t element_num(std::vector<hsize_t> &vec)
  {
    return static_cast<uint32_t>(
-      std::accumulate(std::begin(vec), std::end(vec), 1, std::multiplies<uint32_t>()));
+    std::accumulate(std::begin(vec), std::end(vec), 1, std::multiplies<uint32_t>()));
  }
  
  H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype)
@@ -94,10 +94,20 @@ int entry(int argc, char **argv)
  {
    arser::Arser arser;
    arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test");
+  arser.add_argument("--input_data")
+    .required(true)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Path to generate input data h5 file");
+  arser.add_argument("--expected_data")
+    .required(true)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Path to generate expected data h5 file");
    arser.add_argument("--fixed_seed")
-      .required(false)
-      .nargs(0)
-      .help("Put a fixed seed into the random number generator");
+    .required(false)
+    .nargs(0)
+    .help("Put a fixed seed into the random number generator");
  
    try
    {
@@ -111,8 +121,6 @@ int entry(int argc, char **argv)
    }
  
    std::string circle_file = arser.get<std::string>("circle");
-  size_t last_dot_index = circle_file.find_last_of(".");
-  std::string prefix = circle_file.substr(0, last_dot_index);
  
    // load circle file
    foder::FileLoader file_loader{circle_file};
@@ -144,17 +152,17 @@ int entry(int argc, char **argv)
     *       ㄴDATA ...
     */
    // create random data and dump into hdf5 file
-  H5::H5File input_file{prefix + ".input.h5", H5F_ACC_TRUNC};
+  H5::H5File input_file{arser.get<std::string>("--input_data"), H5F_ACC_TRUNC};
    std::unique_ptr<H5::Group> input_name_group =
-      std::make_unique<H5::Group>(input_file.createGroup("name"));
+    std::make_unique<H5::Group>(input_file.createGroup("name"));
    std::unique_ptr<H5::Group> input_value_group =
-      std::make_unique<H5::Group>(input_file.createGroup("value"));
+    std::make_unique<H5::Group>(input_file.createGroup("value"));
  
-  H5::H5File output_file{prefix + ".expected.h5", H5F_ACC_TRUNC};
+  H5::H5File output_file{arser.get<std::string>("--expected_data"), H5F_ACC_TRUNC};
    std::unique_ptr<H5::Group> output_name_group =
-      std::make_unique<H5::Group>(output_file.createGroup("name"));
+    std::make_unique<H5::Group>(output_file.createGroup("name"));
    std::unique_ptr<H5::Group> output_value_group =
-      std::make_unique<H5::Group>(output_file.createGroup("value"));
+    std::make_unique<H5::Group>(output_file.createGroup("value"));
  
    std::random_device rd; // used to obtain a seed for the random number engine
    uint32_t input_index = 0;
@@ -187,7 +195,7 @@ int entry(int argc, char **argv)
        auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
        auto dtype = hdf5_dtype_cast(input_node->dtype());
        auto dataset = std::make_unique<H5::DataSet>(
-          input_file.createDataSet("value/" + std::to_string(input_index), dtype, *dataspace));
+        input_file.createDataSet("value/" + std::to_string(input_index), dtype, *dataspace));
  
        auto data_size = ::element_num(dims);
        auto dtype_size = loco::size(input_node->dtype());
@@ -241,7 +249,7 @@ int entry(int argc, char **argv)
        auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
        auto dtype = hdf5_dtype_cast(output_node->dtype());
        auto dataset = std::make_unique<H5::DataSet>(
-          output_file.createDataSet("value/" + std::to_string(output_index), dtype, *dataspace));
+        output_file.createDataSet("value/" + std::to_string(output_index), dtype, *dataspace));
  
        uint32_t tensor_bytesize = loco::size(output_node->dtype());
        tensor_bytesize *= ::element_num(dims);
diff --git a/compiler/crew/CMakeLists.txt b/compiler/crew/CMakeLists.txt

new file mode 100644 (file)

index 0000000..1824d86
--- /dev/null
+++ b/compiler/crew/CMakeLists.txt
@@ -0,0 +1,20 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(crew STATIC ${SOURCES})
+target_include_directories(crew PRIVATE src)
+target_include_directories(crew PUBLIC include)
+target_link_libraries(crew PRIVATE foder)
+target_link_libraries(crew PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(crew_test ${TESTS})
+target_include_directories(crew_test PRIVATE src)
+target_link_libraries(crew_test nncc_common)
+target_link_libraries(crew_test crew)
diff --git a/compiler/crew/README.md b/compiler/crew/README.md

new file mode 100644 (file)

index 0000000..2969192
--- /dev/null
+++ b/compiler/crew/README.md
@@ -0,0 +1,13 @@
+# crew
+
+_crew_ is circle partitioning Configuration REader and Writer library.
+
+### Support formats
+
+Current _crew_ supports below format and functionalities.
+- INI read
+- INI write
+- JSON write
+
+_crew_ supports limited portion of JSON and INI formats just enough to access
+circle partition configuration files.
diff --git a/compiler/crew/include/crew/PConfig.h b/compiler/crew/include/crew/PConfig.h

new file mode 100644 (file)

index 0000000..9ff8755
--- /dev/null
+++ b/compiler/crew/include/crew/PConfig.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_H__
+#define __CREW_PCONFIG_H__
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace crew
+{
+
+struct Part
+{
+  std::string model_file;
+  std::vector<std::string> inputs;
+  std::vector<std::string> outputs;
+};
+
+using Parts = std::vector<Part>;
+using Source = Part;
+
+struct PConfig
+{
+  Source source;
+  Parts parts;
+};
+
+/**
+ * @brief Read config as ini file, return false if failed
+ */
+bool read_ini(const std::string &path, PConfig &config);
+
+/**
+ * @brief Write config as ini file, return false if failed
+ */
+bool write_ini(std::ostream &os, const PConfig &config);
+
+/**
+ * @brief Write config as json file, return false if failed
+ */
+bool write_json(std::ostream &os, const PConfig &config);
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_H__
diff --git a/compiler/crew/include/crew/PConfigIni.h b/compiler/crew/include/crew/PConfigIni.h

new file mode 100644 (file)

index 0000000..45a54e1
--- /dev/null
+++ b/compiler/crew/include/crew/PConfigIni.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_INI_H__
+#define __CREW_PCONFIG_INI_H__
+
+#include <iostream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace crew
+{
+
+using KeyValues = std::unordered_map<std::string, std::string>;
+
+struct Section
+{
+  std::string name;
+  KeyValues items;
+};
+
+using Sections = std::vector<Section>;
+
+/**
+ * @brief Reads Config INI from null terminated string and return Sections
+ */
+Sections read_ini(const char *data, size_t length);
+/**
+ * @brief Reads Config INI from file and return Sections
+ */
+Sections read_ini(const std::string &path);
+
+/**
+ * @brief Write Config INI with Sections to ostream
+ */
+void write_ini(std::ostream &os, const Sections &sections);
+/**
+ * @brief Write Config INI with Sections to file, throw if failed
+ */
+void write_ini(const std::string &path, const Sections &sections);
+
+/**
+ * @brief Find a section with name, empty section if not found
+ */
+Section find(const Sections &sections, const std::string &name);
+
+/**
+ * @brief Find a key-value pair from key and return value, empty string if not found
+ */
+std::string find(const Section &section, const std::string &key);
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_INI_H__
diff --git a/compiler/crew/include/crew/PConfigIniDump.h b/compiler/crew/include/crew/PConfigIniDump.h

new file mode 100644 (file)

index 0000000..0755c6b
--- /dev/null
+++ b/compiler/crew/include/crew/PConfigIniDump.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_INI_DUMP_H__
+#define __CREW_PCONFIG_INI_DUMP_H__
+
+#include "PConfigIni.h"
+
+#include <iostream>
+
+namespace crew
+{
+
+void dump(std::ostream &os, const Sections &sections);
+
+} // namespace crew
+
+std::ostream &operator<<(std::ostream &os, const crew::Sections &sections);
+
+#endif // __CREW_PCONFIG_INI_DUMP_H__
diff --git a/compiler/crew/requires.cmake b/compiler/crew/requires.cmake

new file mode 100644 (file)

index 0000000..27406d4
--- /dev/null
+++ b/compiler/crew/requires.cmake
@@ -0,0 +1 @@
+require("foder")
diff --git a/compiler/crew/src/PConfig.cpp b/compiler/crew/src/PConfig.cpp

new file mode 100644 (file)

index 0000000..b8e7c3e
--- /dev/null
+++ b/compiler/crew/src/PConfig.cpp
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfig.h"
+#include "crew/PConfigIni.h"
+
+#include "PConfigJson.h"
+
+#include <utility>
+
+namespace
+{
+
+bool read_part(const crew::Section &section, crew::Part &part)
+{
+  // construct Source from section_source
+  part.model_file = crew::find(section, "file");
+  if (part.model_file.empty())
+    return false;
+
+  // read inputs for Source
+  for (int32_t i = 1;; ++i)
+  {
+    std::string item = "i" + std::to_string(i);
+    std::string input = crew::find(section, item);
+    if (input.empty())
+      break;
+
+    part.inputs.push_back(input);
+  }
+  // read outputs for Source
+  for (int32_t i = 1;; ++i)
+  {
+    std::string item = "o" + std::to_string(i);
+    std::string output = crew::find(section, item);
+    if (output.empty())
+      break;
+
+    part.outputs.push_back(output);
+  }
+  return true;
+}
+
+} // namespace
+
+namespace
+{
+
+void write_part(crew::JsonExport &je, const crew::Part &part)
+{
+  std::vector<std::string> graph_inputs;
+  std::vector<std::string> graph_outputs;
+
+  for (auto &input : part.inputs)
+  {
+    graph_inputs.push_back(input);
+  }
+  for (auto &output : part.outputs)
+  {
+    graph_outputs.push_back(output);
+  }
+
+  je.key_val("file", part.model_file.c_str(), true);
+  je.key_val("inputs", graph_inputs, true);
+  je.key_val("outputs", graph_outputs, false);
+}
+
+void write_parts(crew::JsonExport &je, const crew::Parts &parts)
+{
+  uint32_t idx = 1;
+  uint32_t size = parts.size();
+  for (auto &part : parts)
+  {
+    je.open_brace();
+    write_part(je, part);
+    je.close_brace(idx < size);
+    idx++;
+  }
+}
+
+} // namespace
+
+namespace
+{
+
+void part_to_section_io(const crew::Part &part, crew::Section &section)
+{
+  uint32_t idx = 1;
+  for (auto &input : part.inputs)
+  {
+    std::string key = "i" + std::to_string(idx);
+    section.items.emplace(key, input);
+    idx++;
+  }
+  idx = 1;
+  for (auto &output : part.outputs)
+  {
+    std::string key = "o" + std::to_string(idx);
+    section.items.emplace(key, output);
+    idx++;
+  }
+}
+
+} // namespace
+
+namespace crew
+{
+
+bool read_ini(const std::string &path, PConfig &pconfig)
+{
+  auto sections = crew::read_ini(path);
+
+  auto section_source = crew::find(sections, "source");
+  auto section_models = crew::find(sections, "models");
+  if (section_source.name != "source" || section_models.name != "models")
+  {
+    return false;
+  }
+
+  if (!read_part(section_source, pconfig.source))
+  {
+    return false;
+  }
+
+  // get models list
+  std::vector<std::string> models;
+  for (int32_t i = 1;; ++i)
+  {
+    std::string item = "m" + std::to_string(i);
+    std::string model = crew::find(section_models, item);
+    if (model.empty())
+      break;
+
+    models.push_back(model);
+  }
+
+  for (auto &model : models)
+  {
+    auto section_model = crew::find(sections, model);
+
+    Part part;
+    if (!read_part(section_model, part))
+    {
+      return false;
+    }
+    pconfig.parts.push_back(part);
+  }
+
+  return true;
+}
+
+bool write_ini(std::ostream &os, const PConfig &pconfig)
+{
+  crew::Sections sections;
+
+  // make [source]
+  crew::Section section_source;
+  section_source.name = "source";
+  section_source.items["file"] = pconfig.source.model_file;
+  part_to_section_io(pconfig.source, section_source);
+  sections.push_back(section_source);
+
+  // make [models]
+  crew::Section section_models;
+  section_models.name = "models";
+  uint32_t idx = 1;
+  for (auto &part : pconfig.parts)
+  {
+    std::string key = "m" + std::to_string(idx);
+    section_models.items[key] = part.model_file;
+    idx++;
+  }
+  sections.push_back(section_models);
+
+  for (auto &part : pconfig.parts)
+  {
+    // make circle model section
+    crew::Section section_model;
+    section_model.name = part.model_file;
+    section_model.items["file"] = part.model_file;
+    part_to_section_io(part, section_model);
+    sections.push_back(section_model);
+  }
+
+  write_ini(os, sections);
+
+  return true;
+}
+
+bool write_json(std::ostream &os, const PConfig &pconfig)
+{
+  crew::JsonExport je(os);
+
+  je.open_brace();
+  {
+    je.open_brace("source");
+    write_part(je, pconfig.source);
+    je.close_brace(true);
+  }
+  {
+    je.open_bracket("parts");
+    write_parts(je, pconfig.parts);
+    je.close_bracket(false);
+  }
+  je.close_brace(false);
+
+  return true;
+}
+
+} // namespace crew
diff --git a/compiler/crew/src/PConfigIni.cpp b/compiler/crew/src/PConfigIni.cpp

new file mode 100644 (file)

index 0000000..f0e3e8e
--- /dev/null
+++ b/compiler/crew/src/PConfigIni.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <foder/FileLoader.h>
+
+#include <cassert>
+#include <cstring>
+#include <fstream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace crew
+{
+
+Sections read_ini(const char *data, size_t length)
+{
+  assert(data != nullptr);
+  assert(length > 0);
+
+  auto buffer = std::vector<char>();
+  buffer.reserve(length + 1);
+  char *pbuffer = buffer.data();
+  memcpy(pbuffer, data, length);
+  // add null at end to be sure
+  *(pbuffer + length) = 0;
+
+  Sections sections;
+  Section section;
+
+  std::string string_line;
+
+  const char *delim = "\r\n";
+  const char *one_line = std::strtok(pbuffer, delim);
+  while (one_line != nullptr)
+  {
+    if (*one_line == '[')
+    {
+      if (!section.name.empty())
+      {
+        sections.push_back(section);
+      }
+      section.name.clear();
+      section.items.clear();
+
+      string_line = one_line + 1;
+      auto pos = string_line.find(']');
+      assert(pos != std::string::npos);
+      if (pos != std::string::npos)
+      {
+        section.name = string_line.substr(0, pos);
+      }
+    }
+    else if (*one_line == '#' || *one_line == ';')
+    {
+      // Comment line, do nothing
+    }
+    else if (*one_line) // string legnth is not 0
+    {
+      if (section.name.empty())
+        throw std::runtime_error("Invalid INI file");
+
+      string_line = one_line;
+      auto pos = string_line.find('=');
+      assert(pos != std::string::npos);
+      if (pos != std::string::npos)
+      {
+        auto key = string_line.substr(0, pos);
+        auto val = string_line.substr(pos + 1);
+        section.items.emplace(key, val);
+      }
+    }
+
+    one_line = std::strtok(nullptr, delim);
+  }
+  if (!section.name.empty())
+  {
+    sections.push_back(section);
+  }
+
+  return sections;
+}
+
+Sections read_ini(const std::string &path)
+{
+  foder::FileLoader file_loader{path};
+  // load will throw if error while opening
+  auto ini_data = file_loader.load();
+
+  return read_ini(ini_data.data(), ini_data.size());
+}
+
+void write_ini(std::ostream &os, const Sections &sections)
+{
+  std::stringstream ss;
+
+  ss << sections;
+
+  std::string strss = ss.str();
+
+  os.write(strss.c_str(), strss.length());
+}
+
+void write_ini(const std::string &filepath, const Sections &sections)
+{
+  std::ofstream fs(filepath.c_str(), std::ofstream::binary | std::ofstream::trunc);
+  if (not fs.good())
+  {
+    std::string msg = "Failed to create file: " + filepath;
+    throw std::runtime_error(msg);
+  }
+
+  write_ini(fs, sections);
+
+  fs.close();
+}
+
+Section find(const Sections &sections, const std::string &name)
+{
+  for (auto &section : sections)
+  {
+    if (section.name == name)
+      return section;
+  }
+  Section not_found;
+  return not_found;
+}
+
+std::string find(const Section &section, const std::string &key)
+{
+  for (auto &item : section.items)
+  {
+    if (item.first == key)
+      return item.second;
+  }
+  return "";
+}
+
+} // namespace crew
diff --git a/compiler/crew/src/PConfigIni.test.cpp b/compiler/crew/src/PConfigIni.test.cpp

new file mode 100644 (file)

index 0000000..bdd2ccc
--- /dev/null
+++ b/compiler/crew/src/PConfigIni.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+#include <stdexcept>
+
+TEST(ConfigIniTest, read_ini_non_exist_file)
+{
+  EXPECT_THROW(crew::read_ini("/hello/world/not_a_file"), std::runtime_error);
+}
+
+TEST(ConfigIniTest, read_ini_simple)
+{
+  std::stringstream ss;
+
+  ss << "[hello]\nkey=world\n";
+
+  auto str = ss.str();
+  auto sections = crew::read_ini(str.c_str(), str.length());
+  ASSERT_EQ(1UL, sections.size());
+
+  auto its = sections.begin();
+  ASSERT_NE(sections.end(), its);
+  EXPECT_TRUE("hello" == its->name);
+  ASSERT_EQ(1UL, its->items.size());
+
+  auto it = its->items.begin();
+  ASSERT_NE(its->items.end(), it);
+  EXPECT_TRUE("key" == it->first);
+  EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, read_ini_simple_NEG)
+{
+  std::stringstream ss;
+
+  ss << "key=value\nhello=world\n";
+
+  auto str = ss.str();
+
+  EXPECT_THROW(crew::read_ini(str.c_str(), str.length()), std::runtime_error);
+}
+
+TEST(ConfigIniTest, read_ini_comment)
+{
+  std::stringstream ss;
+
+  ss << "[hello]\n;comment=skip\n#comment=skip\nkey=world\n";
+
+  auto str = ss.str();
+  auto sections = crew::read_ini(str.c_str(), str.length());
+  ASSERT_EQ(1UL, sections.size());
+
+  auto its = sections.begin();
+  ASSERT_NE(sections.end(), its);
+  EXPECT_TRUE("hello" == its->name);
+  ASSERT_EQ(1UL, its->items.size());
+
+  auto it = its->items.begin();
+  ASSERT_NE(its->items.end(), it);
+  EXPECT_TRUE("key" == it->first);
+  EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, write_ini_file_error_NEG)
+{
+  crew::Sections sections;
+  EXPECT_THROW(crew::write_ini("/abc/def/cannot_access", sections), std::runtime_error);
+}
diff --git a/compiler/crew/src/PConfigIniDump.cpp b/compiler/crew/src/PConfigIniDump.cpp

new file mode 100644 (file)

index 0000000..5b7a1cb
--- /dev/null
+++ b/compiler/crew/src/PConfigIniDump.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIniDump.h"
+
+namespace crew
+{
+
+/**
+ * @brief Dump content of sections
+ */
+void dump(std::ostream &os, const Sections &sections)
+{
+  for (auto &section : sections)
+  {
+    os << "[" << section.name << "]" << std::endl;
+    for (auto &item : section.items)
+    {
+      os << item.first << "=" << item.second << std::endl;
+    }
+    os << std::endl;
+  }
+}
+
+} // namespace crew
+
+std::ostream &operator<<(std::ostream &os, const crew::Sections &sections)
+{
+  crew::dump(os, sections);
+  return os;
+}
diff --git a/compiler/crew/src/PConfigIniDump.test.cpp b/compiler/crew/src/PConfigIniDump.test.cpp

new file mode 100644 (file)

index 0000000..25cf473
--- /dev/null
+++ b/compiler/crew/src/PConfigIniDump.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+#include <stdexcept>
+
+TEST(ConfigIniDumpTest, dump_sections)
+{
+  crew::Sections sections;
+  crew::Section section;
+
+  section.name = "hello";
+  section.items["key"] = "value";
+
+  sections.push_back(section);
+
+  std::stringstream ss;
+
+  ss << sections;
+
+  // there's extra \n at end of each section
+  ASSERT_TRUE(ss.str() == "[hello]\nkey=value\n\n");
+}
diff --git a/compiler/crew/src/PConfigJson.cpp b/compiler/crew/src/PConfigJson.cpp

new file mode 100644 (file)

index 0000000..5af0ebd
--- /dev/null
+++ b/compiler/crew/src/PConfigJson.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PConfigJson.h"
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+const char _CLF = '\n'; // Control Line Feed
+const char _DQU = '\"'; // Double QUotation
+
+} // namespace
+
+namespace crew
+{
+
+void JsonExport::indent(void)
+{
+  for (uint32_t i = 0; i < _indent; ++i)
+    _os << "  ";
+}
+
+void JsonExport::open_brace(void)
+{
+  indent();
+
+  _os << "{" << _CLF;
+  _indent++;
+}
+
+void JsonExport::open_brace(const std::string &key)
+{
+  indent();
+
+  _os << _DQU << key << _DQU << " : {" << _CLF;
+  _indent++;
+}
+
+void JsonExport::open_bracket(const std::string &key)
+{
+  indent();
+
+  _os << _DQU << key << _DQU << " : [" << _CLF;
+  _indent++;
+}
+
+void JsonExport::close_bracket(bool cont)
+{
+  _indent--;
+  indent();
+
+  _os << "]";
+  if (cont)
+    _os << ",";
+  _os << _CLF;
+}
+
+void JsonExport::close_brace(bool cont)
+{
+  _indent--;
+  indent();
+
+  _os << "}";
+  if (cont)
+    _os << ",";
+  _os << _CLF;
+}
+
+void JsonExport::key_val(const std::string &key, const std::string &value, bool cont)
+{
+  indent();
+
+  _os << _DQU << key << _DQU << " : " << _DQU << value << _DQU;
+  if (cont)
+    _os << ",";
+  _os << _CLF;
+}
+
+void JsonExport::key_val(const std::string &key, const std::vector<std::string> &l, bool cont)
+{
+  indent();
+
+  _os << _DQU << key << _DQU << " : [ ";
+  bool comma = false;
+  for (auto &v : l)
+  {
+    if (comma)
+      _os << ", ";
+    else
+      comma = true;
+    _os << _DQU << v << _DQU;
+  }
+  _os << " ]";
+  if (cont)
+    _os << ",";
+  _os << _CLF;
+}
+
+} // namespace crew
diff --git a/compiler/crew/src/PConfigJson.h b/compiler/crew/src/PConfigJson.h

new file mode 100644 (file)

index 0000000..c5c49d0
--- /dev/null
+++ b/compiler/crew/src/PConfigJson.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_JSON_H__
+#define __CREW_PCONFIG_JSON_H__
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace crew
+{
+
+class JsonExport
+{
+public:
+  JsonExport(std::ostream &os) : _os(os) {}
+
+private:
+  void indent(void);
+
+public:
+  void open_brace(void);
+  void open_brace(const std::string &key);
+  void open_bracket(const std::string &key);
+  void close_bracket(bool cont);
+  void close_brace(bool cont);
+  void key_val(const std::string &key, const std::string &value, bool cont);
+  void key_val(const std::string &key, const std::vector<std::string> &l, bool cont);
+
+private:
+  std::ostream &_os;
+  uint32_t _indent = 0;
+};
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_JSON_H__
diff --git a/compiler/crew/src/PConfigJson.test.cpp b/compiler/crew/src/PConfigJson.test.cpp

new file mode 100644 (file)

index 0000000..f8afabc
--- /dev/null
+++ b/compiler/crew/src/PConfigJson.test.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PConfigJson.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+
+TEST(ConfigJsonTest, empty)
+{
+  std::stringstream ss;
+  crew::JsonExport je(ss);
+
+  je.open_brace();
+  je.close_brace(true);
+
+  ASSERT_TRUE(ss.str() == "{\n},\n");
+}
+
+TEST(ConfigJsonTest, keyvalue)
+{
+  std::stringstream ss;
+  crew::JsonExport je(ss);
+
+  je.open_brace("hello");
+  je.key_val("key", "value", true);
+  je.close_brace(true);
+
+  ASSERT_TRUE(ss.str() == "\"hello\" : {\n  \"key\" : \"value\",\n},\n");
+}
+
+TEST(ConfigJsonTest, keyvaluearray)
+{
+  std::stringstream ss;
+  crew::JsonExport je(ss);
+  std::vector<std::string> vs = {"1", "2"};
+
+  je.open_brace("hello");
+  je.key_val("key", vs, true);
+  je.close_brace(true);
+
+  ASSERT_TRUE(ss.str() == "\"hello\" : {\n  \"key\" : [ \"1\", \"2\" ],\n},\n");
+}
+
+TEST(ConfigJsonTest, bracket)
+{
+  std::stringstream ss;
+  crew::JsonExport je(ss);
+
+  je.open_bracket("hello");
+  je.close_bracket(true);
+
+  ASSERT_TRUE(ss.str() == "\"hello\" : [\n],\n");
+}
diff --git a/compiler/cwrap/src/Fildes.test.cpp b/compiler/cwrap/src/Fildes.test.cpp

index f9fa20f9ea611be6aea36658b66a69c659d8ce46..c487f064ab332ac776efaf919ee9cf0a67e96b46 100644 (file)
--- a/compiler/cwrap/src/Fildes.test.cpp
+++ b/compiler/cwrap/src/Fildes.test.cpp
@@ -44,7 +44,7 @@ int make_temp(char *name_template)
    return fd;
  }
  
-} // namespace make_temp
+} // namespace
  
  TEST(FildesTest, default_constructor)
  {
diff --git a/compiler/enco/cli/CMakeLists.txt b/compiler/enco/cli/CMakeLists.txt

index 5a43ab655ff431fdb2f87071606b7400f6988b47..6777f329b07ee59cac31b800631943ab81929f73 100644 (file)
--- a/compiler/enco/cli/CMakeLists.txt
+++ b/compiler/enco/cli/CMakeLists.txt
@@ -5,7 +5,6 @@ target_include_directories(enco-cli PRIVATE src)
  target_link_libraries(enco-cli enco_intf_cmdline)
  target_link_libraries(enco-cli enco_intf_frontend)
  target_link_libraries(enco-cli enco_core)
-target_link_libraries(enco-cli stdex)
  target_link_libraries(enco-cli dl)
  # Let's use project-wide compile options
  target_link_libraries(enco-cli nncc_common)
diff --git a/compiler/enco/cli/src/Driver.cpp b/compiler/enco/cli/src/Driver.cpp

index 185bb13b980ffcd78aab779e6c9c936246a2ad65..fe6cefb8ca0fe960488605bb687c22658675318f 100644 (file)
--- a/compiler/enco/cli/src/Driver.cpp
+++ b/compiler/enco/cli/src/Driver.cpp
@@ -135,8 +135,7 @@ private:
  
  } // namespace
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <map>
  
  #include <iostream>
@@ -153,7 +152,7 @@ static int entry(int argc, char **argv)
    std::map<std::string, std::function<void(const std::string &arg)>> argparse;
  
    argparse["--frontend"] = [&](const std::string &path) {
-    frontend_zone = stdex::make_unique<FrontendZone>(path);
+    frontend_zone = std::make_unique<FrontendZone>(path);
    };
  
    argparse["--frontend-arg"] = [&](const std::string &arg) { frontend_zone->append(arg); };
diff --git a/compiler/enco/core/CMakeLists.txt b/compiler/enco/core/CMakeLists.txt

index f437e687ab6c7d655cb6529d45a2c9395168d397..25dad2bc638b72e12e9dbfe4e57c649096b57639 100644 (file)
--- a/compiler/enco/core/CMakeLists.txt
+++ b/compiler/enco/core/CMakeLists.txt
@@ -17,7 +17,6 @@ target_link_libraries(enco_core PUBLIC coco_generic)
  # These libraries are linked for internal use, and thus does not appear in public headers.
  target_link_libraries(enco_core PRIVATE pp)
  target_link_libraries(enco_core PRIVATE morph)
-target_link_libraries(enco_core PRIVATE stdex)
  # Let's use nncc project-wide build options
  target_link_libraries(enco_core PRIVATE nncc_common)
  
diff --git a/compiler/enco/core/src/ANN/Binder.h b/compiler/enco/core/src/ANN/Binder.h

index 71b95676bfe0cb90303b25dbf81e8205c15afdf5..be9f705c781c4aea1d12cb81ddf2e6c4006b66e3 100644 (file)
--- a/compiler/enco/core/src/ANN/Binder.h
+++ b/compiler/enco/core/src/ANN/Binder.h
@@ -32,7 +32,7 @@ class ANNBinder
  {
  public:
    ANNBinder(coco::Block *block, std::unique_ptr<ann::Module> &&module)
-      : _block{block}, _module{std::move(module)}
+    : _block{block}, _module{std::move(module)}
    {
      // DO NOTHING
    }
diff --git a/compiler/enco/core/src/ANN/Context.cpp b/compiler/enco/core/src/ANN/Context.cpp

index d4d1882faf2ee12493ea4cbed55ef4e1cedf9308..b6d2a3d42e5f7b3302e5c08927f06e3107cb7deb 100644 (file)
--- a/compiler/enco/core/src/ANN/Context.cpp
+++ b/compiler/enco/core/src/ANN/Context.cpp
@@ -16,12 +16,12 @@
  
  #include "ANN/Context.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  ANNBinder *ANNContext::create(coco::Block *blk)
  {
-  auto mod = stdex::make_unique<ann::Module>();
-  auto obj = stdex::make_unique<ANNBinder>(blk, std::move(mod));
+  auto mod = std::make_unique<ann::Module>();
+  auto obj = std::make_unique<ANNBinder>(blk, std::move(mod));
    auto ptr = obj.get();
  
    _binders.emplace_back(std::move(obj));
diff --git a/compiler/enco/core/src/ANN/Context.test.cpp b/compiler/enco/core/src/ANN/Context.test.cpp

index 7fd26f30c0e63f80904c9177c49da12462950201..252d92290567035c6f68f4f9e01533b15445c45d 100644 (file)
--- a/compiler/enco/core/src/ANN/Context.test.cpp
+++ b/compiler/enco/core/src/ANN/Context.test.cpp
@@ -33,7 +33,7 @@ public:
  protected:
    std::unique_ptr<coco::Module> m;
  };
-}
+} // namespace
  
  TEST_F(ANNContextTest, constructor)
  {
diff --git a/compiler/enco/core/src/ANN/IR/OperandInventory.cpp b/compiler/enco/core/src/ANN/IR/OperandInventory.cpp

index c7ad3881146ac9c7a87762b81d16af52f1412549..4399c39005977771dec9650e2222739269d98fdd 100644 (file)
--- a/compiler/enco/core/src/ANN/IR/OperandInventory.cpp
+++ b/compiler/enco/core/src/ANN/IR/OperandInventory.cpp
@@ -16,9 +16,9 @@
  
  #include "ANN/IR/OperandInventory.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace ann
  {
diff --git a/compiler/enco/core/src/ANN/IR/Operation.h b/compiler/enco/core/src/ANN/IR/Operation.h

index cacc2b794f10d5ee7d0a8d3d125d116be1c321dd..a1f1d46e2485be72f4782eeb0df4693cf3765421 100644 (file)
--- a/compiler/enco/core/src/ANN/IR/Operation.h
+++ b/compiler/enco/core/src/ANN/IR/Operation.h
@@ -38,7 +38,7 @@ public:
  public:
    Operation(const Code &code, std::initializer_list<OperandID> inputs,
              std::initializer_list<OperandID> outputs)
-      : _code{code}, _inputs{inputs}, _outputs{outputs}
+    : _code{code}, _inputs{inputs}, _outputs{outputs}
    {
      // DO NOTHING
    }
diff --git a/compiler/enco/core/src/ANN/IR/OperationInventory.cpp b/compiler/enco/core/src/ANN/IR/OperationInventory.cpp

index 37d48c1709a920454e1c8a1257e46e02caf15c40..93108dfb737f7bf734fcb5060feebec76064fb9f 100644 (file)
--- a/compiler/enco/core/src/ANN/IR/OperationInventory.cpp
+++ b/compiler/enco/core/src/ANN/IR/OperationInventory.cpp
@@ -16,9 +16,9 @@
  
  #include "OperationInventory.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace ann
  {
diff --git a/compiler/enco/core/src/ANN/IR/WeightInventory.cpp b/compiler/enco/core/src/ANN/IR/WeightInventory.cpp

index d8809ac08c0b7728972345e1eb5abc809db3aab0..edcb16aed051e1e2ff4a97a215f738fdd452eb45 100644 (file)
--- a/compiler/enco/core/src/ANN/IR/WeightInventory.cpp
+++ b/compiler/enco/core/src/ANN/IR/WeightInventory.cpp
@@ -16,9 +16,9 @@
  
  #include "WeightInventory.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace ann
  {
diff --git a/compiler/enco/core/src/AsmCode.h b/compiler/enco/core/src/AsmCode.h

index c43892888a6690146b3fc06eb462b6b2144ff366..6d57f18511e1100e445b12a28ff19afe8f70c052 100644 (file)
--- a/compiler/enco/core/src/AsmCode.h
+++ b/compiler/enco/core/src/AsmCode.h
@@ -27,7 +27,7 @@ class AsmCode
  {
  public:
    AsmCode(const std::string &filename, const std::string &varname)
-      : _filename{filename}, _varname{varname}
+    : _filename{filename}, _varname{varname}
    {
      // DO NOTHING
    }
diff --git a/compiler/enco/core/src/Backend.cpp b/compiler/enco/core/src/Backend.cpp

index d4bec7447cbd5cde77022dd63a8495c79fb54ffd..77374fecd0322c6a4053a34ced9454c828be3e17 100644 (file)
--- a/compiler/enco/core/src/Backend.cpp
+++ b/compiler/enco/core/src/Backend.cpp
@@ -44,13 +44,12 @@
  #include "Transforms/Split.h"
  #include "Transforms/GlobalDataGeneration.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <stdexcept>
  #include <iostream>
  #include <fstream>
  
-using stdex::make_unique;
+using std::make_unique;
  using namespace enco;
  
  namespace
@@ -168,7 +167,7 @@ void BackendImpl::compile(coco::Module *m, coco::Data *d)
    ofs << CppCode{data_var, code(sess)} << std::endl;
  }
  
-} // namespace enco
+} // namespace
  
  #include <iostream>
  
diff --git a/compiler/enco/core/src/CodeIndex.h b/compiler/enco/core/src/CodeIndex.h

index 7f2da64637879341ae0714b15b39fb85944446ca..ed8f241098e9f88d23617b06cdad2089347be904 100644 (file)
--- a/compiler/enco/core/src/CodeIndex.h
+++ b/compiler/enco/core/src/CodeIndex.h
@@ -30,7 +30,7 @@ public:
  
  public:
    CodeIndex(const coco::BlockIndex &blk_ind, const coco::InstrIndex &ins_ind)
-      : _blk_ind{blk_ind}, _ins_ind{ins_ind}
+    : _blk_ind{blk_ind}, _ins_ind{ins_ind}
    {
    }
  
diff --git a/compiler/enco/core/src/CppGen/Host.cpp b/compiler/enco/core/src/CppGen/Host.cpp

index 37e0583d74a3a9d0ab077401abe0652c6e469ecb..7f945623921028f756791078f3afa469c68b44b7 100644 (file)
--- a/compiler/enco/core/src/CppGen/Host.cpp
+++ b/compiler/enco/core/src/CppGen/Host.cpp
@@ -18,8 +18,7 @@
  
  #include <pp/EnclosedDocument.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <map>
  #include <string>
  
@@ -157,7 +156,7 @@ public:
  
  public:
    TransferLoop(uint32_t count, uint32_t src_step, uint32_t dst_step)
-      : _count{count}, _step{src_step, dst_step}
+    : _count{count}, _step{src_step, dst_step}
    {
      // DO NOTHING
    }
@@ -293,7 +292,7 @@ std::unique_ptr<pp::MultiLineText> HostBlockCompiler::compile(const coco::Block
  {
    InstrPrinter prn{_mem};
  
-  auto res = stdex::make_unique<pp::LinearDocument>();
+  auto res = std::make_unique<pp::LinearDocument>();
  
    for (auto ins = blk->instr()->head(); ins; ins = ins->next())
    {
diff --git a/compiler/enco/core/src/CppGen/Subnet.cpp b/compiler/enco/core/src/CppGen/Subnet.cpp

index 9a636c6ae848d8323c8e05201606cf902177b68f..599b0794e5189b3025b1a0babde35b8472f0dd82 100644 (file)
--- a/compiler/enco/core/src/CppGen/Subnet.cpp
+++ b/compiler/enco/core/src/CppGen/Subnet.cpp
@@ -21,11 +21,10 @@
  
  #include <pp/LinearDocument.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <sstream>
  
-using stdex::make_unique;
+using std::make_unique;
  using enco::concat;
  
  #define S(content) #content
@@ -117,7 +116,7 @@ class ScalarOperandDecl final : public CodeFragment
  {
  public:
    ScalarOperandDecl(const std::string &model, const ann::DType &dtype)
-      : _model{model}, _dtype{dtype}
+    : _model{model}, _dtype{dtype}
    {
      // DO NOTHING
    }
@@ -150,7 +149,7 @@ class TensorOperandDecl final : public CodeFragment
  public:
    TensorOperandDecl(const std::string &model, const ann::DType &dtype,
                      const nncc::core::ADT::tensor::Shape &shape)
-      : _model{model}, _dtype{dtype}, _shape{shape}
+    : _model{model}, _dtype{dtype}, _shape{shape}
    {
      // DO NOTHING
    }
@@ -194,7 +193,7 @@ class WeightDecl final : public CodeFragment
  public:
    WeightDecl(const std::string &model, const ann::OperandID &id, const std::string &base,
               const std::string &size)
-      : _model{model}, _id{id}, _base{base}, _size{size}
+    : _model{model}, _id{id}, _base{base}, _size{size}
    {
      // DO NOTHING
    }
diff --git a/compiler/enco/core/src/Session.cpp b/compiler/enco/core/src/Session.cpp

index 034f2389242aedc58178553f9c5b9e7217224c5b..18af87ace9d31d4a6295aa3b148a31c48a9c4189 100644 (file)
--- a/compiler/enco/core/src/Session.cpp
+++ b/compiler/enco/core/src/Session.cpp
@@ -16,12 +16,10 @@
  
  #include "Session.h"
  
-#include <stdex/Memory.h>
-
  #include <map>
  #include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/enco/core/src/Support/Debugging.cpp b/compiler/enco/core/src/Support/Debugging.cpp

index bd65a27d855535ed33af52db2e18dc87ed1d8c14..9a9a7745ee375c38dc3fe34b42f8108950db42bd 100644 (file)
--- a/compiler/enco/core/src/Support/Debugging.cpp
+++ b/compiler/enco/core/src/Support/Debugging.cpp
@@ -77,7 +77,7 @@ pp::LinearDocument operator<<(const SectionBuilder &builder, Callback cb)
  }
  
  SectionBuilder section(const std::string &tag) { return SectionBuilder{tag}; }
-}
+} // namespace
  
  /**
   * SECTION: Bag
diff --git a/compiler/enco/core/src/Transforms/FeatureUnification.cpp b/compiler/enco/core/src/Transforms/FeatureUnification.cpp

index 1a7a0a8a48bb960aa31620ba1668d6739812b563..9e4a8e19f669224f80b2091a2ab1c3baa291be88 100644 (file)
--- a/compiler/enco/core/src/Transforms/FeatureUnification.cpp
+++ b/compiler/enco/core/src/Transforms/FeatureUnification.cpp
@@ -17,14 +17,13 @@
  #include "FeatureUnification.h"
  #include "IRUtils.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <set>
  #include <vector>
  
  #include <cassert>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp b/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp

index 152477a5100889a30f90cd225bb589b31592d959..cb5a0a9a974ae4aeb000aa80e05e907b6181355b 100644 (file)
--- a/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp
+++ b/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp
@@ -18,11 +18,10 @@
  #include "Split.h"
  #include "Dims.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <map>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/enco/core/src/Transforms/Split.cpp b/compiler/enco/core/src/Transforms/Split.cpp

index b57b8f88295834590b7b38c5592f9af2ce19886a..714c27a722d584c1db8aee2ad032bea7c2d1c1c1 100644 (file)
--- a/compiler/enco/core/src/Transforms/Split.cpp
+++ b/compiler/enco/core/src/Transforms/Split.cpp
@@ -22,13 +22,13 @@
  #include <coco/IR.h>
  
  #include <nncc/core/ADT/kernel/NHWCLayout.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <map>
  #include <stdexcept>
  #include <functional>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
@@ -337,8 +337,8 @@ public:
      auto ofm = binder->addOperand<float>(_ofm);
  
      binder->addOperation(
-        ann::Operation::Code::DEPTHWISE_CONV_2D,
-        {ifm, ker, bias, left, right, top, bottom, hstride, vstride, multiplier, fuse}, {ofm});
+      ann::Operation::Code::DEPTHWISE_CONV_2D,
+      {ifm, ker, bias, left, right, top, bottom, hstride, vstride, multiplier, fuse}, {ofm});
    }
  
  private:
diff --git a/compiler/enco/core/src/Transforms/Split.h b/compiler/enco/core/src/Transforms/Split.h

index b4e1d7bafbf6cae7da5fae66b7a94a32c38af241..85ad2684fe4ff6a66d0f2bc070241ad22a237ea5 100644 (file)
--- a/compiler/enco/core/src/Transforms/Split.h
+++ b/compiler/enco/core/src/Transforms/Split.h
@@ -43,6 +43,6 @@ struct PhaseConstructionPass final : public Pass
    void run(const SessionID &sess) const override { split_into_phases(code(sess)); }
  };
  
-} // namespace enco;
+} // namespace enco
  
  #endif // __SPLIT_H__
diff --git a/compiler/enco/frontend/caffe/CMakeLists.txt b/compiler/enco/frontend/caffe/CMakeLists.txt

index ce43a41d3b84a474b619eb279f545b7db07849b7..9722392a1eb18d488acb016d0e6ead7c1de9b073 100644 (file)
--- a/compiler/enco/frontend/caffe/CMakeLists.txt
+++ b/compiler/enco/frontend/caffe/CMakeLists.txt
@@ -16,7 +16,6 @@ target_link_libraries(enco_caffe_frontend enco_intf_frontend)
  target_link_libraries(enco_caffe_frontend enco_intf_cmdline)
  target_link_libraries(enco_caffe_frontend morph)
  target_link_libraries(enco_caffe_frontend caffeproto)
-target_link_libraries(enco_caffe_frontend stdex)
  
  nnas_find_package(GTest QUIET)
  
diff --git a/compiler/enco/frontend/caffe/src/Context.h b/compiler/enco/frontend/caffe/src/Context.h

index aca57ce6f9c5667871c5a93c7eef92a703f15579..7cf27ead431ded728b4ddc729ba6d9342392257e 100644 (file)
--- a/compiler/enco/frontend/caffe/src/Context.h
+++ b/compiler/enco/frontend/caffe/src/Context.h
@@ -81,8 +81,8 @@ public:
    explicit GraphBuilderContext(coco::Module *module, coco::Data *data, coco::Block *block,
                                 ShapeContext &shape_ctx, StoreContext &bag_ctx,
                                 WeightContext &weight_ctx)
-      : _module(module), _data(data), _block(block), _shape_ctx(shape_ctx), _bag_ctx(bag_ctx),
-        _weight_ctx(weight_ctx)
+    : _module(module), _data(data), _block(block), _shape_ctx(shape_ctx), _bag_ctx(bag_ctx),
+      _weight_ctx(weight_ctx)
    {
      // DO NOTHING
    }
diff --git a/compiler/enco/frontend/caffe/src/Entry.cpp b/compiler/enco/frontend/caffe/src/Entry.cpp

index 2bdb73eac4837d92fc49a10ad7644f04b937ee90..41e174bc4f7f2d5b0f033577d6abf62dda5d41dd 100644 (file)
--- a/compiler/enco/frontend/caffe/src/Entry.cpp
+++ b/compiler/enco/frontend/caffe/src/Entry.cpp
@@ -19,8 +19,7 @@
  
  #include <cmdline/View.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <fstream>
  #include <cassert>
  
@@ -28,7 +27,7 @@ extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cm
  {
    assert(cmdline.size() == 2);
  
-  auto frontend = stdex::make_unique<Frontend>();
+  auto frontend = std::make_unique<Frontend>();
  
    // Fill prototxt
    {
diff --git a/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp b/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp

index e9db3117716be88ed9cdd538abc73dab79557fee..d9a1c9617f5932a3024b22c07bc5781cc0126ba8 100644 (file)
--- a/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp
+++ b/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp
@@ -25,9 +25,9 @@
  #include "Layer/Scale.h"
  #include "Layer/BatchNorm.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace caffeimport
  {
diff --git a/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp b/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp

index 9fb096d490081ad5d4623033a8f3b3bfd541a9f5..807cce44d8ff98464de915a9733e0ee5b0deabb5 100644 (file)
--- a/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp
+++ b/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp
@@ -101,7 +101,7 @@ void ConvolutionBuilder::build(const ::caffe::LayerParameter &layer,
  
    auto ker_dst = data->f32()->access(ker_obj);
    auto ker_src = kernel::OverlayFactory<float, kernel::NCHWLayout>::make(
-      ker_obj->shape(), ker_blob->mutable_data()->begin());
+    ker_obj->shape(), ker_blob->mutable_data()->begin());
  
    for (uint32_t n = 0; n < ker_obj->shape().count(); ++n)
    {
diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt

index 77159879ee6fae2d95ec6ca19a9200e0a8b0fee6..ea10fbc4b5ada3480ba2ce0bba6dfb6a61026b0b 100644 (file)
--- a/compiler/enco/frontend/tflite/CMakeLists.txt
+++ b/compiler/enco/frontend/tflite/CMakeLists.txt
@@ -19,7 +19,6 @@ target_link_libraries(enco_tflite_frontend enco_intf_frontend)
  target_link_libraries(enco_tflite_frontend enco_intf_cmdline)
  target_link_libraries(enco_tflite_frontend flatbuffers)
  target_link_libraries(enco_tflite_frontend enco_tflite_schema)
-target_link_libraries(enco_tflite_frontend stdex)
  target_link_libraries(enco_tflite_frontend morph)
  target_link_libraries(enco_tflite_frontend cwrap)
  
diff --git a/compiler/enco/frontend/tflite/src/Context.cpp b/compiler/enco/frontend/tflite/src/Context.cpp

index ef030dc5df55e3d77fe5105331d058e552f263d3..588c3c44b044186a3f128fe685efcfaa8f86dbca 100644 (file)
--- a/compiler/enco/frontend/tflite/src/Context.cpp
+++ b/compiler/enco/frontend/tflite/src/Context.cpp
@@ -48,7 +48,7 @@ void TensorContext::prepare(const tflite::SubGraph *graph)
  }
  
  TflOpCodeContext::TflOpCodeContext(
-    const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *opcodes)
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *opcodes)
  {
    for (const tflite::OperatorCode *opcode : *opcodes)
    {
diff --git a/compiler/enco/frontend/tflite/src/Context.h b/compiler/enco/frontend/tflite/src/Context.h

index f72385f9aa5a164cccd9720f31a8e2f236bedc67..caeac4ab5b47d671e4f54e9508ff0edbce6f6f3e 100644 (file)
--- a/compiler/enco/frontend/tflite/src/Context.h
+++ b/compiler/enco/frontend/tflite/src/Context.h
@@ -135,8 +135,8 @@ public:
    explicit GraphBuilderContext(coco::Module *m, coco::Data *d, coco::Block *block,
                                 TensorBags &tensor_bags, TensorContext &tensor_context,
                                 TflBufferContext &buffer_context, const tflite::SubGraph *graph)
-      : _m(m), _d(d), _block(block), _tensor_bags(tensor_bags), _tensor_context(tensor_context),
-        _buffer_context(buffer_context), _graph(graph)
+    : _m(m), _d(d), _block(block), _tensor_bags(tensor_bags), _tensor_context(tensor_context),
+      _buffer_context(buffer_context), _graph(graph)
    {
      // DO NOTHING
    }
diff --git a/compiler/enco/frontend/tflite/src/Entry.cpp b/compiler/enco/frontend/tflite/src/Entry.cpp

index c69e180743c24bfa1465007bfe3e1d27beb463c2..74d3096ab7df4b12d239e0c224129988f0682073 100644 (file)
--- a/compiler/enco/frontend/tflite/src/Entry.cpp
+++ b/compiler/enco/frontend/tflite/src/Entry.cpp
@@ -19,12 +19,11 @@
  
  #include <cmdline/View.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <fstream>
  #include <cassert>
  
-using stdex::make_unique;
+using std::make_unique;
  
  extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
  {
diff --git a/compiler/enco/frontend/tflite/src/Frontend.test.cpp b/compiler/enco/frontend/tflite/src/Frontend.test.cpp

index aee6099e733456dbf08f947cefccf4f1cd844193..1bc7746296e3cde215afc6e2e07d20de5408f807 100644 (file)
--- a/compiler/enco/frontend/tflite/src/Frontend.test.cpp
+++ b/compiler/enco/frontend/tflite/src/Frontend.test.cpp
@@ -16,11 +16,11 @@
  
  #include "Frontend.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
diff --git a/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h b/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h

index 1ae882e899f6dd3eaee221c161044de713e0b0ee..ca4f74fc5d1f548f5c0dadd6609f6edd8a71abf4 100644 (file)
--- a/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h
+++ b/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h
@@ -29,11 +29,11 @@
  #include "Op/Div.h"
  
  #include <schema_generated.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <map>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace tflimport
  {
@@ -68,7 +68,7 @@ private:
      // add GraphBuilder for each tflite operation.
      _builder_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DGraphBuilder>();
      _builder_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] =
-        make_unique<DepthwiseConv2DGraphBuilder>();
+      make_unique<DepthwiseConv2DGraphBuilder>();
      _builder_map[tflite::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<AvgPool2DGraphBuilder>();
      _builder_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<MaxPool2DGraphBuilder>();
      _builder_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationGraphBuilder>();
diff --git a/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp b/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp

index 16f68fcdbf7c50384f768f280d47caed2fa4b4ec..6f8223f10bc0bbe716fec8a689e50d542435e863 100644 (file)
--- a/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp
+++ b/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp
@@ -102,7 +102,7 @@ void AvgPool2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContex
    coco_avgpool2d->stride()->horizontal(params->stride_w());
  
    coco::Padding2D padding =
-      pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
+    pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
  
    coco_avgpool2d->pad()->top(padding.top());
    coco_avgpool2d->pad()->bottom(padding.bottom());
diff --git a/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp b/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp

index e9516c0e9236b4a203916ed94f25dfabc7eb9bc4..d1f97597f3eae0faabd95d427d8c54f7b48fdd2c 100644 (file)
--- a/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp
+++ b/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp
@@ -171,7 +171,7 @@ void Conv2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *
  
    // fused activation
    coco::FeatureObject *act_output =
-      build_activation(conv_params->fused_activation_function(), blk, last_obj);
+    build_activation(conv_params->fused_activation_function(), blk, last_obj);
  
    // Create Copy Instr of last_obj to Output Object
    auto copy_ins = instr_builder(m).copy(ofm_obj, act_output);
diff --git a/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp b/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp

index e3d7b263e11edb0cc83b49d0b46134720441d9ab..bc903c380d7cb62a75a85b66fc71f65aa3a9ee7e 100644 (file)
--- a/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp
+++ b/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp
@@ -138,8 +138,8 @@ void DepthwiseConv2DGraphBuilder::build(const tflite::Operator *op,
            auto wc = new_shape.width() * new_shape.depth();
  
            ker_spn[n * hwc + h * wc + w * new_shape.depth() + c] =
-              buffer.ptr[tfl_n * hw * new_shape.count() + /* new_shape.count() is old c */
-                         h * new_shape.width() * new_shape.count() + w * new_shape.count() + tfl_c];
+            buffer.ptr[tfl_n * hw * new_shape.count() + /* new_shape.count() is old c */
+                       h * new_shape.width() * new_shape.count() + w * new_shape.count() + tfl_c];
          }
        }
      }
@@ -220,7 +220,7 @@ void DepthwiseConv2DGraphBuilder::build(const tflite::Operator *op,
  
    // fused activation
    coco::FeatureObject *act_output =
-      build_activation(dconv_params->fused_activation_function(), blk, last_obj);
+    build_activation(dconv_params->fused_activation_function(), blk, last_obj);
  
    // Create Copy Instr of last_obj to Output Object
    auto copy_ins = instr_builder(m).copy(ofm_obj, act_output);
diff --git a/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp b/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp

index ee4406425163a88c4b35214d4b13c7defdd285bc..41e0cde17dfeb59bc97434a427f951663690e6ef 100644 (file)
--- a/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp
+++ b/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp
@@ -99,7 +99,7 @@ void MaxPool2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContex
    coco_maxpool2d->stride()->horizontal(params->stride_w());
  
    coco::Padding2D padding =
-      pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
+    pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
  
    coco_maxpool2d->pad()->top(padding.top());
    coco_maxpool2d->pad()->bottom(padding.bottom());
diff --git a/compiler/enco/test/basic/000/CMakeLists.txt b/compiler/enco/test/basic/000/CMakeLists.txt

index 20ba3c571385ddbbe7e8c58fe2b87d2a1801541c..95e9cb0b0ec26ab21d547a2e615b956eebe22a91 100644 (file)
--- a/compiler/enco/test/basic/000/CMakeLists.txt
+++ b/compiler/enco/test/basic/000/CMakeLists.txt
@@ -11,7 +11,6 @@ set(LIB_TARGET ${PREFIX}-lib)
  add_library(${PREFIX}-frontend SHARED enco.test.cpp)
  target_link_libraries(${PREFIX}-frontend enco_intf_cmdline)
  target_link_libraries(${PREFIX}-frontend enco_intf_frontend)
-target_link_libraries(${PREFIX}-frontend stdex)
  
  # NOTE BYPRODUCTS are not specified in order to enforce source code generation
  add_custom_command(OUTPUT ${GENERATED_CPP} ${GENERATED_ASM} ${GENERATED_BIN}
diff --git a/compiler/enco/test/basic/000/enco.test.cpp b/compiler/enco/test/basic/000/enco.test.cpp

index 3dbf9661312dddaac6db3c05f009eb5bc8e96620..84c28d0f74ce24bc1c7fd5697552a39c272e6e17 100644 (file)
--- a/compiler/enco/test/basic/000/enco.test.cpp
+++ b/compiler/enco/test/basic/000/enco.test.cpp
@@ -19,7 +19,7 @@
  
  #include <nncc/core/ADT/tensor/LexicalLayout.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  using namespace nncc::core::ADT;
  
@@ -77,5 +77,5 @@ struct Frontend final : public enco::Frontend
  
  extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
  {
-  return stdex::make_unique<Frontend>();
+  return std::make_unique<Frontend>();
  }
diff --git a/compiler/enco/test/binder.cpp b/compiler/enco/test/binder.cpp

index c8c72fc8b75567611311bb83b7ac9896cdb5fe2e..f04cfa4f66b5351e88c8c1d656dcef1b88252dd0 100644 (file)
--- a/compiler/enco/test/binder.cpp
+++ b/compiler/enco/test/binder.cpp
@@ -46,9 +46,9 @@ void Network_invoke(Network *net);
  #include <nncc/core/ADT/tensor/LexicalLayout.h>
  #include <nncc/core/ADT/tensor/Overlay.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  using namespace nncc::core::ADT;
  
  namespace
diff --git a/compiler/enco/test/caffe/CMakeLists.txt b/compiler/enco/test/caffe/CMakeLists.txt

index ee49b6b281a8fe4fab1c318bf4722aee798e1ffb..d552d6ec8201fd468a3dcb6716ba129897d3df46 100644 (file)
--- a/compiler/enco/test/caffe/CMakeLists.txt
+++ b/compiler/enco/test/caffe/CMakeLists.txt
@@ -123,7 +123,6 @@ foreach(PREFIX IN ITEMS ${CANDIDATES})
    target_link_libraries(${BINDER_TARGET} nnkit_intf_backend)
    target_link_libraries(${BINDER_TARGET} ann_api)
    target_link_libraries(${BINDER_TARGET} ann_ref_static)
-  target_link_libraries(${BINDER_TARGET} stdex)
    set_target_properties(${BINDER_TARGET} PROPERTIES OUTPUT_NAME ${PREFIX})
  
    list(APPEND TESTS ${PREFIX})
diff --git a/compiler/enco/test/tflite/CMakeLists.txt b/compiler/enco/test/tflite/CMakeLists.txt

index d5a96a6da3a2685b296acda642621eaf21442be8..81d5ed2a2d6647bf4c961cf8872416d0dd45a27f 100644 (file)
--- a/compiler/enco/test/tflite/CMakeLists.txt
+++ b/compiler/enco/test/tflite/CMakeLists.txt
@@ -90,7 +90,6 @@ foreach(PREFIX IN ITEMS ${CANDIDATES})
    target_link_libraries(${BINDER_TARGET} nnkit_intf_backend)
    target_link_libraries(${BINDER_TARGET} ann_api)
    target_link_libraries(${BINDER_TARGET} ann_ref_static)
-  target_link_libraries(${BINDER_TARGET} stdex)
    set_target_properties(${BINDER_TARGET} PROPERTIES OUTPUT_NAME ${PREFIX})
  
    list(APPEND TESTS ${PREFIX})
diff --git a/compiler/encodump/CMakeLists.txt b/compiler/encodump/CMakeLists.txt

index 58fe17a51fd0f360894063d97da9f08abeb769ab..a4ad441b2e6795b739ac43ee68b28e96eb0eed36 100644 (file)
--- a/compiler/encodump/CMakeLists.txt
+++ b/compiler/encodump/CMakeLists.txt
@@ -13,5 +13,4 @@ target_include_directories(encodump PRIVATE src)
  target_link_libraries(encodump enco_intf_frontend)
  target_link_libraries(encodump enco_core)
  target_link_libraries(encodump safemain)
-target_link_libraries(encodump stdex)
  target_link_libraries(encodump dl)
diff --git a/compiler/encodump/src/Driver.cpp b/compiler/encodump/src/Driver.cpp

index f27cbe90486acb8708026be8fb8c94b804015b7c..2928d1d25099057605961d03ee9a6d36323ae5de 100644 (file)
--- a/compiler/encodump/src/Driver.cpp
+++ b/compiler/encodump/src/Driver.cpp
@@ -137,8 +137,7 @@ private:
  
  } // namespace
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <map>
  
  #include <iostream>
@@ -163,7 +162,7 @@ int entry(int argc, char **argv)
    std::map<std::string, std::function<void(const std::string &arg)>> argparse;
  
    argparse["--frontend"] = [&](const std::string &path) {
-    frontend_zone = stdex::make_unique<FrontendZone>(path);
+    frontend_zone = std::make_unique<FrontendZone>(path);
    };
  
    argparse["--frontend-arg"] = [&](const std::string &arg) { frontend_zone->append(arg); };
diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt

index 79c75ef2efa69144fd0fe416436147f0553a0484..e686cbb83f19717ddbc18b2726450de8e70d0f02 100644 (file)
--- a/compiler/exo/CMakeLists.txt
+++ b/compiler/exo/CMakeLists.txt
@@ -39,7 +39,6 @@ target_include_directories(exo PRIVATE src)
  target_link_libraries(exo PUBLIC exo_tflite_fbs)
  target_link_libraries(exo PUBLIC exo_circle_fbs)
  target_link_libraries(exo PUBLIC loco)
-target_link_libraries(exo PRIVATE stdex)
  target_link_libraries(exo PRIVATE pepper_str)
  target_link_libraries(exo PRIVATE pepper_strcast)
  target_link_libraries(exo PRIVATE locoex_customop)
@@ -64,7 +63,6 @@ nnas_find_package(GTest REQUIRED)
  
  GTest_AddTest(exo_test ${TESTS})
  target_include_directories(exo_test PRIVATE src)
-target_link_libraries(exo_test stdex)
  target_link_libraries(exo_test pepper_str)
  target_link_libraries(exo_test exo)
  target_link_libraries(exo_test hermes_std)
diff --git a/compiler/exo/requires.cmake b/compiler/exo/requires.cmake

index 6378b942de3b8572a70aa22adbf830ebe0852d19..3116c57575dc5d08827ec6badf39f86125183efd 100644 (file)
--- a/compiler/exo/requires.cmake
+++ b/compiler/exo/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
  require("loco")
  require("locoex-customop")
  require("logo")
diff --git a/compiler/exo/src/Circle/CircleExporter.cpp b/compiler/exo/src/Circle/CircleExporter.cpp

index 797749090e6ce11d76263e178cfac2c076d52686..cfcb9a25865c1bde05dd5fef77aacbab607315e6 100644 (file)
--- a/compiler/exo/src/Circle/CircleExporter.cpp
+++ b/compiler/exo/src/Circle/CircleExporter.cpp
@@ -18,16 +18,15 @@
  
  #include "CircleExporterImpl.h"
  
-#include <stdex/Memory.h>
-
  #include <oops/InternalExn.h>
  
+#include <memory>
  #include <fstream>
  
  namespace exo
  {
  
-CircleExporter::CircleExporter(loco::Graph *graph) : _impl(stdex::make_unique<Impl>(graph))
+CircleExporter::CircleExporter(loco::Graph *graph) : _impl(std::make_unique<Impl>(graph))
  {
    // NOTHING TO DO
  }
diff --git a/compiler/exo/src/Circle/CircleExporterImpl.cpp b/compiler/exo/src/Circle/CircleExporterImpl.cpp

index 4cba33da14a55c4e1d18096630b8f70e97487ca2..a93931597b88a6cec22cd0bf49db395ad3215fe4 100644 (file)
--- a/compiler/exo/src/Circle/CircleExporterImpl.cpp
+++ b/compiler/exo/src/Circle/CircleExporterImpl.cpp
@@ -88,7 +88,7 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<OpCode, uint3
          INTERNAL_EXN("Cannot find code for customop even though opcode is BuiltinOperator_CUSTOM");
  
        operator_codes_vec[idx] =
-          CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+        CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
      }
    }
    return builder.CreateVector(operator_codes_vec);
@@ -148,7 +148,7 @@ void CircleExporter::Impl::exportGraph(loco::Graph *graph)
  
    // encode operator codes
    auto operator_codes =
-      encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
+    encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
  
    // Subgraphs
    Offset<SubGraph> subgraph = exportSubgraph(gd);
diff --git a/compiler/exo/src/Circle/CircleExporterUtils.cpp b/compiler/exo/src/Circle/CircleExporterUtils.cpp

index 12b204ce7ebea85bb969da86edf1f8ac6103d60e..079f115f66078e5721be75f1814de455816dcf6a 100644 (file)
--- a/compiler/exo/src/Circle/CircleExporterUtils.cpp
+++ b/compiler/exo/src/Circle/CircleExporterUtils.cpp
@@ -78,13 +78,13 @@ circle::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *
    //
    // NOTE input and output 'feature' map are shape of NHWC
    bool same_padding_criterion_1 =
-      (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
-      (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+    (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+    (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
  
    // For same padding, rear padding is same or bigger than front padding by at most 1
    bool same_padding_criterion_2 =
-      (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
-      (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+    (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+    (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
  
    if (same_padding_criterion_1 && same_padding_criterion_2)
      return circle::Padding_SAME;
@@ -123,8 +123,7 @@ void registerGraphIOName(loco::Graph *graph, SerializedModelData &gd)
    gd._data_format = circle::DataFormat::DataFormat_CHANNELS_LAST;
  }
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace
@@ -150,7 +149,7 @@ private:
  void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id)
  {
    assert(node->annot<TFLTensorIndexAnnotation>() == nullptr);
-  node->annot(stdex::make_unique<TFLTensorIndexAnnotation>(tensor_id));
+  node->annot(std::make_unique<TFLTensorIndexAnnotation>(tensor_id));
  }
  
  TFLTensorIndex get_tensor_index(loco::Node *node)
diff --git a/compiler/exo/src/Circle/CircleOperationExporter.cpp b/compiler/exo/src/Circle/CircleOperationExporter.cpp

index 390e2ec99b92df4a2edc28fbc0e2a30a3e00e36a..8b7337011d52b81ff153275b777cb535fc7284db 100644 (file)
--- a/compiler/exo/src/Circle/CircleOperationExporter.cpp
+++ b/compiler/exo/src/Circle/CircleOperationExporter.cpp
@@ -89,13 +89,19 @@ public:
    void visit(loco::ReLU *) final;
    void visit(loco::ReLU6 *) final;
    void visit(loco::Tanh *) final;
-  void visit(loco::Push *) final { /* DO NOTHING */}
-  void visit(loco::Pull *) final { /* DO NOTHING */}
+  void visit(loco::Push *) final
+  { /* DO NOTHING */
+  }
+  void visit(loco::Pull *) final
+  { /* DO NOTHING */
+  }
    void visit(loco::FeatureEncode *) final;
    void visit(loco::FeatureDecode *) final;
    void visit(loco::FilterEncode *) final;
    void visit(loco::DepthwiseFilterEncode *) final;
-  void visit(loco::ConstGen *) final { /* skip, everything is done in exportOpDefinedTensors */}
+  void visit(loco::ConstGen *) final
+  { /* skip, everything is done in exportOpDefinedTensors */
+  }
    void visit(loco::MaxPool2D *) final;
    void visit(loco::AvgPool2D *) final;
    void visit(loco::Conv2D *) final;
@@ -235,7 +241,7 @@ void OperationExporter::visit(locoex::TFLFullyConnected *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    auto options =
-      CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+    CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
  
    // Make FULLY_CONNECTED operator
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -375,8 +381,8 @@ void OperationExporter::visit(locoex::TFLTranspose *node)
    auto options = CreateTransposeOptions(builder);
  
    auto op_offset =
-      CreateOperator(builder, op_idx, inputs, outputs,
-                     circle::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+    CreateOperator(builder, op_idx, inputs, outputs,
+                   circle::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
    gd._operators.push_back(op_offset);
  }
  
@@ -393,7 +399,7 @@ void OperationExporter::visit(locoex::TFLTransposeConv *node)
    auto outputs = builder.CreateVector(outputs_vec);
    circle::Padding padding = getOpPadding(node->padding());
    auto options =
-      CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+    CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
  
    // Make TRANSPOSE_CONV operator
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -405,7 +411,7 @@ template <class TFLPool2D>
  void OperationExporter::export_pool_2d(TFLPool2D *node, circle::BuiltinOperator builtin_op)
  {
    EXO_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
-                 builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+               builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
               "should be maxpool or avgpool");
    EXO_ASSERT(node->padding() != locoex::Padding::UNDEFINED, "Padding is not set");
  
@@ -481,10 +487,10 @@ void OperationExporter::visit(loco::MaxPool2D *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    circle::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical(), node->window()->horizontal(),
-                                     node->window()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+                        node->window()->horizontal(), node->window()->vertical());
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                    circle::BuiltinOptions_Pool2DOptions, options.Union());
    gd._operators.push_back(op_offset);
@@ -501,10 +507,10 @@ void OperationExporter::visit(loco::AvgPool2D *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    circle::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical(), node->window()->horizontal(),
-                                     node->window()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+                        node->window()->horizontal(), node->window()->vertical());
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                    circle::BuiltinOptions_Pool2DOptions, options.Union());
    gd._operators.push_back(op_offset);
@@ -527,7 +533,7 @@ void OperationExporter::visit(loco::Conv2D *node)
    std::vector<float> bias_vec_data(bias_vec_size); // initialized as zero vector
  
    auto bias_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
  
    auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
  
@@ -539,7 +545,7 @@ void OperationExporter::visit(loco::Conv2D *node)
    auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
  
    auto bias_tensor_offset =
-      CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+    CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
    gd._tensors.push_back(bias_tensor_offset);
  
    // Make input, output and options for operator
@@ -549,9 +555,9 @@ void OperationExporter::visit(loco::Conv2D *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    circle::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreateConv2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreateConv2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical());
  
    // Make CONV_2D operator
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -581,7 +587,7 @@ void OperationExporter::visit(loco::TransposedConv2D *node)
    }
  
    auto outshape_vec_offset = builder.CreateVector(
-      reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
+    reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
  
    auto outshape_buffer_offset = CreateBuffer(builder, outshape_vec_offset);
  
@@ -630,7 +636,7 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
    size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
    std::vector<float> bias_vec_data(bias_vec_size);
    auto bias_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
  
    auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
  
@@ -642,7 +648,7 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
    auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
  
    auto bias_tensor_offset =
-      CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+    CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
    gd._tensors.push_back(bias_tensor_offset);
  
    std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
@@ -651,13 +657,13 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    circle::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
  
    int32_t ifm_channel_size = ShapeInference::get(node->ifm())._dims[3];
    // multiplier = bias_vec_size(output_size)/ifm_channel_size
    auto options =
-      CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
-                                   node->stride()->vertical(), bias_vec_size / ifm_channel_size);
+    CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
+                                 node->stride()->vertical(), bias_vec_size / ifm_channel_size);
  
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                    circle::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
@@ -691,7 +697,7 @@ void OperationExporter::visit(loco::TensorReduce *node)
  
    size_t raw_axes_vec_size = axes_vec_size * sizeof(int32_t);
    auto axes_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
  
    auto axes_buffer_offset = CreateBuffer(builder, axes_vec_offset);
  
@@ -703,7 +709,7 @@ void OperationExporter::visit(loco::TensorReduce *node)
    auto name_offset = builder.CreateString("t_" + std::to_string(axes_tensor_id));
  
    auto axes_tensor_offset =
-      CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
+    CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
    gd._tensors.push_back(axes_tensor_offset);
  
    std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), axes_tensor_id};
@@ -766,7 +772,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
    constexpr size_t raw_perm_vec_size = perm_vec_size * sizeof(int32_t);
  
    auto perm_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
  
    auto perm_buffer_offset = CreateBuffer(builder, perm_vec_offset);
  
@@ -778,7 +784,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
    auto name_offset = builder.CreateString("t_" + std::to_string(perm_tensor_id));
  
    auto perm_tensor_offset =
-      CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
+    CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
    gd._tensors.push_back(perm_tensor_offset);
  
    // Create permutation node
@@ -792,7 +798,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
    constexpr auto options_type = circle::BuiltinOptions::BuiltinOptions_TransposeOptions;
  
    auto transpose_offset =
-      CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
+    CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
    gd._operators.push_back(transpose_offset);
  }
  
@@ -878,11 +884,11 @@ void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
    //      but also by input.
  
    auto input_shape_shape_vec_offset =
-      builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
+    builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
  
    size_t input_shape_vec_size = new_shape_vec.size() * sizeof(int32_t);
    auto input_shape_input_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
    auto input_shape_buffer_offset = CreateBuffer(builder, input_shape_input_vec_offset);
  
    const auto input_shape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
@@ -891,7 +897,7 @@ void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
    auto input_shape_tensor_id = static_cast<int32_t>(gd._tensors.size());
    auto name_offset = builder.CreateString("t_" + std::to_string(input_shape_tensor_id));
    auto input_shape_tensor_offset = CreateTensor(
-      builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
+    builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
    gd._tensors.push_back(input_shape_tensor_offset);
  
    uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RESHAPE);
@@ -1093,7 +1099,7 @@ void OperationExporter::visit(loco::TensorConstantPad *node)
    auto padding_shape_vec_ptr = builder.CreateVector(std::vector<int32_t>{padding_vec_size, 2});
    // create tensor
    auto padding_tensor_ptr =
-      CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
+    CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
    // get tensor id
    const auto padding_tensor_id = static_cast<int32_t>(gd._tensors.size());
  
diff --git a/compiler/exo/src/Circle/CircleTypeInference.cpp b/compiler/exo/src/Circle/CircleTypeInference.cpp

index a1e92b8844414eb535c190c2b78157477a3fc094..d3d01b4afb5ea916d1af950eabc678e375a515ea 100644 (file)
--- a/compiler/exo/src/Circle/CircleTypeInference.cpp
+++ b/compiler/exo/src/Circle/CircleTypeInference.cpp
@@ -31,8 +31,6 @@
  
  #include <oops/InternalExn.h>
  
-#include <stdex/Memory.h>
-
  #include <stdexcept>
  #include <type_traits>
  
diff --git a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp

index e3884c3cc7b03bd2cd1b9a45703619f0f33767de..aa2cad705394614479eb7cf148a39a7165c5ad43 100644 (file)
--- a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
+++ b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
@@ -75,9 +75,9 @@ bool DepthwiseConv2DConverter::convert(loco::DepthwiseConv2D *origin)
      reshape->tensor(filter_dec);
  
      int32_t new_shape[4] = {
-        1, static_cast<int32_t>(filter_shape.height().value()),
-        static_cast<int32_t>(filter_shape.width().value()),
-        static_cast<int32_t>(filter_shape.depth().value() * filter_shape.multiplier().value())};
+      1, static_cast<int32_t>(filter_shape.height().value()),
+      static_cast<int32_t>(filter_shape.width().value()),
+      static_cast<int32_t>(filter_shape.depth().value() * filter_shape.multiplier().value())};
      locoex::set_new_shape(reshape, new_shape, 4);
  
      tfl_dw_conv2d->filter(reshape);
diff --git a/compiler/exo/src/Convert.cpp b/compiler/exo/src/Convert.cpp

index 45f0481f404536926f1e6c1c51085675f2624ca5..3a578eee84423b185659511c69d9fbdcddeac5fd 100644 (file)
--- a/compiler/exo/src/Convert.cpp
+++ b/compiler/exo/src/Convert.cpp
@@ -32,7 +32,7 @@
  #include <logo/RemoveForwardNodePass.h>
  
  #include <logo/Phase.h>
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace exo
  {
@@ -49,40 +49,40 @@ void convert_to_TFLNodes(loco::Graph *graph)
    logo::Phase phase;
    {
      // prepare type and shape before conversion
-    phase.emplace_back(stdex::make_unique<TypeInferencePass>());
-    phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+    phase.emplace_back(std::make_unique<TypeInferencePass>());
+    phase.emplace_back(std::make_unique<ShapeInferencePass>());
  
      // Add converters for canonical nodes. Note: Not all loco canonical nodes are listed.
-    phase.emplace_back(stdex::make_unique<AvgPool2DConverter>());
-    phase.emplace_back(stdex::make_unique<ConstGenConverter>());
-    phase.emplace_back(stdex::make_unique<Conv2DConverter>());
-    phase.emplace_back(stdex::make_unique<DepthwiseConv2DConverter>());
+    phase.emplace_back(std::make_unique<AvgPool2DConverter>());
+    phase.emplace_back(std::make_unique<ConstGenConverter>());
+    phase.emplace_back(std::make_unique<Conv2DConverter>());
+    phase.emplace_back(std::make_unique<DepthwiseConv2DConverter>());
      // TODO loco::DepthwiseFilterEncode
-    phase.emplace_back(stdex::make_unique<EltwiseAddConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseDivConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseMaxConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseMulConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseSqrtConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseSubConverter>());
-    phase.emplace_back(stdex::make_unique<FeatureBiasAddConverter>());
+    phase.emplace_back(std::make_unique<EltwiseAddConverter>());
+    phase.emplace_back(std::make_unique<EltwiseDivConverter>());
+    phase.emplace_back(std::make_unique<EltwiseMaxConverter>());
+    phase.emplace_back(std::make_unique<EltwiseMulConverter>());
+    phase.emplace_back(std::make_unique<EltwiseSqrtConverter>());
+    phase.emplace_back(std::make_unique<EltwiseSubConverter>());
+    phase.emplace_back(std::make_unique<FeatureBiasAddConverter>());
      // TODO loco::FixedReshape
-    phase.emplace_back(stdex::make_unique<MatMulConverter>());
-    phase.emplace_back(stdex::make_unique<MaxPool2DConverter>());
-    phase.emplace_back(stdex::make_unique<ReluConverter>());
-    phase.emplace_back(stdex::make_unique<Relu6Converter>());
+    phase.emplace_back(std::make_unique<MatMulConverter>());
+    phase.emplace_back(std::make_unique<MaxPool2DConverter>());
+    phase.emplace_back(std::make_unique<ReluConverter>());
+    phase.emplace_back(std::make_unique<Relu6Converter>());
      // TODO loco::Tanh
-    phase.emplace_back(stdex::make_unique<TensorConcatConverter>());
+    phase.emplace_back(std::make_unique<TensorConcatConverter>());
      // TODO loco::TensorBiasAdd
-    phase.emplace_back(stdex::make_unique<TensorBroadcastConverter>());
-    phase.emplace_back(stdex::make_unique<TensorReduceConverter>());
+    phase.emplace_back(std::make_unique<TensorBroadcastConverter>());
+    phase.emplace_back(std::make_unique<TensorReduceConverter>());
      // TODO loco::TensorSoftmax
-    phase.emplace_back(stdex::make_unique<TensorTransposeConverter>());
-    phase.emplace_back(stdex::make_unique<TransposedConv2DConverter>());
+    phase.emplace_back(std::make_unique<TensorTransposeConverter>());
+    phase.emplace_back(std::make_unique<TransposedConv2DConverter>());
  
      // Add optimization below
-    phase.emplace_back(stdex::make_unique<logo::SimplifyDomainConversionPass>());
-    phase.emplace_back(stdex::make_unique<logo::RemoveForwardNodePass>());
-    phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+    phase.emplace_back(std::make_unique<logo::SimplifyDomainConversionPass>());
+    phase.emplace_back(std::make_unique<logo::RemoveForwardNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
    }
  
    logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{graph};
diff --git a/compiler/exo/src/Dialect/IR/CircleNodes.h b/compiler/exo/src/Dialect/IR/CircleNodes.h

index 7be09310366c4c4c7dffe955efdaca9bbcc5333c..c93bd1ab060af58861de2c6185d232e07267228f 100644 (file)
--- a/compiler/exo/src/Dialect/IR/CircleNodes.h
+++ b/compiler/exo/src/Dialect/IR/CircleNodes.h
@@ -53,8 +53,8 @@ private:
   * @brief INSTANCE_NORM in circle
   */
  class CircleInstanceNorm final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
-      public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
  public:
    /// @note  Currently only support FLOAT32 as input node
diff --git a/compiler/exo/src/Dialect/IR/TFLNodes.h b/compiler/exo/src/Dialect/IR/TFLNodes.h

index 41a11e7c0f8e935194dcf98ad9b8ef0fb14bb26a..1642eb1f4e02af8fb3473a22a038732f92c82af0 100644 (file)
--- a/compiler/exo/src/Dialect/IR/TFLNodes.h
+++ b/compiler/exo/src/Dialect/IR/TFLNodes.h
@@ -129,7 +129,9 @@ class TFLAveragePool2D final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::A
                                 public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
  {
  public:
-  TFLAveragePool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+  TFLAveragePool2D() : _padding(Padding::UNDEFINED)
+  { /* empty */
+  }
  
  public:
    loco::Node *value(void) const { return at(0)->node(); }
@@ -240,9 +242,9 @@ private:
   * @brief DEPTHWISE_CONV_2D in TensorFlow Lite
   */
  class TFLDepthwiseConv2D final
-    : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::DEPTHWISE_CONV_2D>>,
-      public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
-      public TFLNodeMixin<TFLNodeTrait::Bias>
+  : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::DEPTHWISE_CONV_2D>>,
+    public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
+    public TFLNodeMixin<TFLNodeTrait::Bias>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
@@ -325,7 +327,9 @@ class TFLMaxPool2D final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::MAX_P
                             public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
  {
  public:
-  TFLMaxPool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+  TFLMaxPool2D() : _padding(Padding::UNDEFINED)
+  { /* empty */
+  }
  
  public:
    loco::Node *value(void) const { return at(0)->node(); }
@@ -463,7 +467,7 @@ public:
  };
  
  class TFLSquaredDifference final
-    : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::SQUARED_DIFFERENCE>>
+  : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::SQUARED_DIFFERENCE>>
  {
  public:
    TFLSquaredDifference() = default;
diff --git a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp

index b68728b477a5a45d344d31cc0a4bce2d93ad575d..5a7e71dcfa692cafb3918da3becc927b6d7c55aa 100644 (file)
--- a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp
+++ b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp
@@ -26,8 +26,6 @@
  #include <loco/Service/CanonicalShapeInferenceRule.h>
  #include <loco/Service/MultiDialectShapeInferenceRule.h>
  
-#include <stdex/Memory.h>
-
  #include <gtest/gtest.h>
  
  TEST(TFLShapeInferenceRuleTest, minimal_with_TFLRelu)
@@ -53,7 +51,7 @@ TEST(TFLShapeInferenceRuleTest, minimal_with_TFLRelu)
    loco::MultiDialectShapeInferenceRule rules;
  
    rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule);
  
    loco::apply(&rules).to(graph.g.get());
  
@@ -98,7 +96,7 @@ TEST(TFLShapeInferenceRuleTest, avgpool2d_valid)
    loco::MultiDialectShapeInferenceRule rules;
  
    rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule);
  
    loco::apply(&rules).to(graph.g.get());
  
@@ -145,7 +143,7 @@ TEST(TFLShapeInferenceRuleTest, avgpool2d_same)
    loco::MultiDialectShapeInferenceRule rules;
  
    rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule);
  
    loco::apply(&rules).to(graph.g.get());
  
diff --git a/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp

index 9326e5e5823520de301bb4b4a17395cdb686be0e..df7aee49c85551c7e784c9f1cc1d9ec5b36577e3 100644 (file)
--- a/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp
+++ b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp
@@ -24,8 +24,6 @@
  #include <loco/IR/CanonicalDialect.h>
  #include <loco/Service/TypeInference.h>
  
-#include <stdex/Memory.h>
-
  #include <gtest/gtest.h>
  
  TEST(TFLTypeInferenceRuleTest, minimal_with_TFLRelu)
diff --git a/compiler/exo/src/ExoFormattedGraph.h b/compiler/exo/src/ExoFormattedGraph.h

index 714e483b5f6b2be223fac97dad0d3986eeb76028..ec417332950ac635daa8e9ea772f11c50d86c232 100644 (file)
--- a/compiler/exo/src/ExoFormattedGraph.h
+++ b/compiler/exo/src/ExoFormattedGraph.h
@@ -19,7 +19,7 @@
  
  #include <locop/FormattedGraph.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace exo
  {
@@ -47,7 +47,7 @@ public:
  public:
    std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
    {
-    return stdex::make_unique<NodeSummaryBuilder>(tlb);
+    return std::make_unique<NodeSummaryBuilder>(tlb);
    }
  };
  
diff --git a/compiler/exo/src/ExoOptimize.cpp b/compiler/exo/src/ExoOptimize.cpp

index d7278e90083586f8fb204699ce6ce51a28975084..752693f38fc7cf6589e564d2e7d43700baa4598f 100644 (file)
--- a/compiler/exo/src/ExoOptimize.cpp
+++ b/compiler/exo/src/ExoOptimize.cpp
@@ -22,7 +22,7 @@
  
  #include <logo/Phase.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace exo
  {
@@ -32,36 +32,36 @@ void optimize(loco::Graph *g)
    logo::Phase phase;
    {
      // prepare type and shape before optimization
-    phase.emplace_back(stdex::make_unique<TypeInferencePass>());
-    phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+    phase.emplace_back(std::make_unique<TypeInferencePass>());
+    phase.emplace_back(std::make_unique<ShapeInferencePass>());
  
-    phase.emplace_back(stdex::make_unique<FoldReshapeOfConstPass>());
-    phase.emplace_back(stdex::make_unique<FoldTransposeOfConstPass>());
+    phase.emplace_back(std::make_unique<FoldReshapeOfConstPass>());
+    phase.emplace_back(std::make_unique<FoldTransposeOfConstPass>());
  
      if (get<Knob::UseFuseBiasAddPass>())
      {
-      phase.emplace_back(stdex::make_unique<FuseBiasAddPass>());
+      phase.emplace_back(std::make_unique<FuseBiasAddPass>());
      }
  
      if (get<Knob::UseFuseInstanceNormPass>())
      {
-      phase.emplace_back(stdex::make_unique<FuseInstanceNormPass>());
+      phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
      }
  
      if (get<Knob::UseFuseReluPass>())
      {
-      phase.emplace_back(stdex::make_unique<FuseReluPass>());
+      phase.emplace_back(std::make_unique<FuseReluPass>());
      }
-    phase.emplace_back(stdex::make_unique<FuseRsqrtPass>());
+    phase.emplace_back(std::make_unique<FuseRsqrtPass>());
  
      if (get<Knob::UseFuseSquaredDifferencePass>())
      {
-      phase.emplace_back(stdex::make_unique<FuseSquaredDifferencePass>());
+      phase.emplace_back(std::make_unique<FuseSquaredDifferencePass>());
      }
  
-    phase.emplace_back(stdex::make_unique<MergeConcatNodesPass>());
+    phase.emplace_back(std::make_unique<MergeConcatNodesPass>());
  
-    phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
    }
  
    logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
diff --git a/compiler/exo/src/GraphBlock.cpp b/compiler/exo/src/GraphBlock.cpp

index 0a45ce8ad1512abba4eac88486d4a7f9246b98d4..b26f2e8b6b7967f50a4c42bc5f9d983f1d8542d0 100644 (file)
--- a/compiler/exo/src/GraphBlock.cpp
+++ b/compiler/exo/src/GraphBlock.cpp
@@ -19,7 +19,7 @@
  #include "Check.h"
  
  #include <loco.h>
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace
  {
@@ -114,7 +114,7 @@ template <FeatureLayout T> loco::FeatureEncode *make_feature_encode(loco::Node *
    EXO_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
    loco::Graph *g = input_for_encode->graph();
  
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
  
    encoder->perm(perm<T>());
  
@@ -130,7 +130,7 @@ template <FeatureLayout T> loco::FeatureDecode *make_feature_decode(loco::Node *
    EXO_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
    loco::Graph *g = input_for_decode->graph();
  
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
  
    decoder->perm(perm<T>());
  
@@ -146,7 +146,7 @@ template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *inp
    EXO_ASSERT(input_for_encode != nullptr, "filter should not be nullptr");
    loco::Graph *g = input_for_encode->graph();
  
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
  
    encoder->perm(perm<T>());
  
@@ -162,7 +162,7 @@ template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *inp
    EXO_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
    loco::Graph *g = input_for_decode->graph();
  
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
  
    decoder->perm(perm<T>());
  
@@ -179,7 +179,7 @@ loco::DepthwiseFilterDecode *make_dw_filter_decode(loco::Node *input_for_decode)
    EXO_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
    loco::Graph *g = input_for_decode->graph();
  
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::DepthwiseFilter>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::DepthwiseFilter>>();
  
    decoder->perm(perm<T>());
  
@@ -195,7 +195,7 @@ template <MatrixLayout T> loco::MatrixEncode *make_matrix_encode(loco::Node *inp
    EXO_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
    loco::Graph *g = input_for_encode->graph();
  
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>();
  
    encoder->perm(perm<T>());
  
@@ -211,7 +211,7 @@ template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *inp
    EXO_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
    loco::Graph *g = input_for_decode->graph();
  
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>();
  
    decoder->perm(perm<T>());
  
diff --git a/compiler/exo/src/GraphBlock.h b/compiler/exo/src/GraphBlock.h

index b771c821b2577e1d9ef15416891d41a88b5a1950..96e4b08316565517d1ed82bdc7a4530f922bfdce 100644 (file)
--- a/compiler/exo/src/GraphBlock.h
+++ b/compiler/exo/src/GraphBlock.h
@@ -72,7 +72,7 @@ template <MatrixLayout T> loco::MatrixEncode *make_matrix_encode(loco::Node *inp
  /// @brief Create a loco::MatrixDecode of given layout
  template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *input_for_decode);
  
-} // exo
+} // namespace exo
  
  //
  // DomainConverter
diff --git a/compiler/exo/src/Log.cpp b/compiler/exo/src/Log.cpp

index aa762968befadcdbd8309029212ccce225643325..cbe9ecb7353f5814882efd526af23f94a04dee85 100644 (file)
--- a/compiler/exo/src/Log.cpp
+++ b/compiler/exo/src/Log.cpp
@@ -17,7 +17,6 @@
  #include "Log.h"
  
  #include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
  
  #include <cstdlib>
  #include <iostream>
diff --git a/compiler/exo/src/LogHelper.cpp b/compiler/exo/src/LogHelper.cpp

index 7520b7ec845c48cc41a1728a2abdc6183cf6d7b6..153356632cf4af9b30a09d2c01e2b7583316b2ec 100644 (file)
--- a/compiler/exo/src/LogHelper.cpp
+++ b/compiler/exo/src/LogHelper.cpp
@@ -72,7 +72,7 @@ namespace exo
  
  FormattedGraph fmt(loco::Graph *g)
  {
-  auto node_summary_builder = stdex::make_unique<NodeSummaryBuilderFactory>();
+  auto node_summary_builder = std::make_unique<NodeSummaryBuilderFactory>();
    return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
  }
  
diff --git a/compiler/exo/src/LoggingContext.cpp b/compiler/exo/src/LoggingContext.cpp

index 1c14d97b9e442e8462760f5bdc4cada6ec4d2c9e..120a50e7b845db960320372d254f460d72a03f3f 100644 (file)
--- a/compiler/exo/src/LoggingContext.cpp
+++ b/compiler/exo/src/LoggingContext.cpp
@@ -18,7 +18,8 @@
  #include "Log.h" // To use LoggerConfig
  
  #include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace exo
  {
@@ -30,11 +31,11 @@ hermes::Context *LoggingContext::get(void)
    if (ctx == nullptr)
    {
      ctx = new hermes::Context;
-    ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-    ctx->config(stdex::make_unique<LoggerConfig>());
+    ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+    ctx->config(std::make_unique<LoggerConfig>());
    }
  
    return ctx;
  }
  
-} // namespac exo
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp b/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp

index 005c429448dc7e895911a2d229745328e95c4a46..66c99121ea4ee1a84347b55275eb8678e73b0d9b 100644 (file)
--- a/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp
+++ b/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp
@@ -124,7 +124,7 @@ void fold_transpose_of_const(locoex::TFLTranspose *transpose)
        index_orig.at(perm->at<S32>(axis)) = index_new.at(axis);
  
      const_new->at<FLOAT32>(l.offset(shape_new, index_new)) =
-        const_orig->at<FLOAT32>(l.offset(shape_orig, index_orig));
+      const_orig->at<FLOAT32>(l.offset(shape_orig, index_orig));
    }
  
    // replace
diff --git a/compiler/exo/src/Pass/FuseBiasAddPass.cpp b/compiler/exo/src/Pass/FuseBiasAddPass.cpp

index 6338dff5d14cbe9c33f6a545dd895a28949cc98b..0e797dc803691a47e8fcfab41dc2a7fbe098874f 100644 (file)
--- a/compiler/exo/src/Pass/FuseBiasAddPass.cpp
+++ b/compiler/exo/src/Pass/FuseBiasAddPass.cpp
@@ -136,7 +136,7 @@ public:
    Fuser(LatterT *latter)
    {
      static_assert(std::is_same<LatterT, locoex::TFLAdd>::value ||
-                      std::is_same<LatterT, locoex::TFLSub>::value,
+                    std::is_same<LatterT, locoex::TFLSub>::value,
                    "wrong template type");
  
      _latter = latter;
@@ -185,7 +185,7 @@ template <class LatterT> locoex::TFLConst *Fuser<LatterT>::create_fused_bias_con
  
      for (uint32_t x = 0; x < bias->dim(0).value(); x++)
        new_bias->at<loco::DataType::FLOAT32>(x) = calc<LatterT>(
-          bias->at<loco::DataType::FLOAT32>(x), _const_node->at<loco::DataType::FLOAT32>(x));
+        bias->at<loco::DataType::FLOAT32>(x), _const_node->at<loco::DataType::FLOAT32>(x));
    }
  
    return new_bias;
@@ -252,14 +252,14 @@ struct Collector final : public locoex::TFLNodeMutableVisitor<void>
    void setCandidate(FormerT *former, LatterT *latter, locoex::TFLConst *const_node)
    {
      static_assert(std::is_same<LatterT, locoex::TFLAdd>::value ||
-                      std::is_same<LatterT, locoex::TFLSub>::value,
+                    std::is_same<LatterT, locoex::TFLSub>::value,
                    "wrong template type");
  
      if (!check_act_func(former))
        return;
  
      auto depth =
-        loco::shape_get(as_loco_node(former)).template as<loco::TensorShape>().dim(3).value();
+      loco::shape_get(as_loco_node(former)).template as<loco::TensorShape>().dim(3).value();
      auto const_shape = loco::shape_get(const_node).template as<loco::TensorShape>();
  
      if (const_shape.rank() == 1 and const_shape.dim(0) == depth)
diff --git a/compiler/exo/src/Pass/FuseInstanceNormPass.cpp b/compiler/exo/src/Pass/FuseInstanceNormPass.cpp

index 04d4a62cd54b724564e56470c782c2fc5a06a522..40aa9144f02883ecfb9e712e1db5b8403306b1b5 100644 (file)
--- a/compiler/exo/src/Pass/FuseInstanceNormPass.cpp
+++ b/compiler/exo/src/Pass/FuseInstanceNormPass.cpp
@@ -291,7 +291,7 @@ bool InstanceNormPattern::matched()
    CHECK_OR_FALSE(add_as_variance);
  
    CHECK_OR_FALSE(
-      fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+    fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
  
    CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
    // TODO Support regarding broadcast
@@ -317,7 +317,7 @@ bool InstanceNormPattern::matched()
    locoex::TFLMul *mul_gamma_should_be = nullptr;
    locoex::TFLMean *mean_of_ifm_should_be = nullptr;
    CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
-                     .with_commutative_args_of(mul_as_scaled_mean));
+                   .with_commutative_args_of(mul_as_scaled_mean));
    CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
    CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
  #undef CHECK_OR_FALSE
diff --git a/compiler/exo/src/Pass/FuseReluPass.test.cpp b/compiler/exo/src/Pass/FuseReluPass.test.cpp

index 6f83d4dd008a363649b02f8d51340c6c5093946f..fd6f88d9c842e1071fb82e373945bf6bbfb18f3b 100644 (file)
--- a/compiler/exo/src/Pass/FuseReluPass.test.cpp
+++ b/compiler/exo/src/Pass/FuseReluPass.test.cpp
@@ -73,8 +73,8 @@ template <class FusedTFLType, locoex::FusedActFunc FusedActFunc> void test()
  {
    static_assert((std::is_same<FusedTFLType, locoex::TFLRelu>::value &&
                   FusedActFunc == locoex::FusedActFunc::RELU) ||
-                    (std::is_same<FusedTFLType, locoex::TFLRelu6>::value &&
-                     FusedActFunc == locoex::FusedActFunc::RELU6),
+                  (std::is_same<FusedTFLType, locoex::TFLRelu6>::value &&
+                   FusedActFunc == locoex::FusedActFunc::RELU6),
                  "wrong template type");
  
    exo::test::TestGraph g;
diff --git a/compiler/exo/src/Pass/MergeConcatNodesPass.cpp b/compiler/exo/src/Pass/MergeConcatNodesPass.cpp

index 8945fcfceaa268a7094abb5fa7234d320cfb2b2e..5885332a6fe8d11365fbaff856a63690891caa98 100644 (file)
--- a/compiler/exo/src/Pass/MergeConcatNodesPass.cpp
+++ b/compiler/exo/src/Pass/MergeConcatNodesPass.cpp
@@ -39,8 +39,8 @@ bool canMerge(locoex::TFLConcatenation *node1, locoex::TFLConcatenation *node2)
      case locoex::FusedActFunc::RELU6:
        return true;
  
-    // case locoex::FusedActFunc::TANH:
-    //   return false;
+      // case locoex::FusedActFunc::TANH:
+      //   return false;
  
      default:
        INTERNAL_EXN_V("Unknown FusedActFunc", oops::to_uint32(node1->fusedActivationFunction()));
diff --git a/compiler/exo/src/Pass/ShapeInferencePass.cpp b/compiler/exo/src/Pass/ShapeInferencePass.cpp

index bc60f91c428298985a44cd5745f3ee34f7e4783d..367d7da91af73852b54fd683284cd7e423c0e840 100644 (file)
--- a/compiler/exo/src/Pass/ShapeInferencePass.cpp
+++ b/compiler/exo/src/Pass/ShapeInferencePass.cpp
@@ -49,9 +49,9 @@ bool ShapeInferencePass::run(loco::Graph *g)
    loco::MultiDialectShapeInferenceRule rules;
  
    rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule)
-      .bind(locoex::CircleDialect::get(), &circle_rule)
-      .bind(locoex::COpDialect::get(), &cop_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule)
+    .bind(locoex::CircleDialect::get(), &circle_rule)
+    .bind(locoex::COpDialect::get(), &cop_rule);
  
    return loco::apply(&rules).to(g);
  }
diff --git a/compiler/exo/src/Pass/TypeInferencePass.cpp b/compiler/exo/src/Pass/TypeInferencePass.cpp

index 31d4f13b62ad5b8754c5cec2c265d5c52a74c2a5..52a9d0c3389b45d9bae53a1fb904433a1cd4238b 100644 (file)
--- a/compiler/exo/src/Pass/TypeInferencePass.cpp
+++ b/compiler/exo/src/Pass/TypeInferencePass.cpp
@@ -47,9 +47,9 @@ bool TypeInferencePass::run(loco::Graph *g)
    loco::MultiDialectTypeInferenceRule rules;
  
    rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule)
-      .bind(locoex::CircleDialect::get(), &circle_rule)
-      .bind(locoex::COpDialect::get(), &cop_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule)
+    .bind(locoex::CircleDialect::get(), &circle_rule)
+    .bind(locoex::COpDialect::get(), &cop_rule);
  
    return loco::apply(&rules).to(g);
  }
diff --git a/compiler/exo/src/ProgressReporter.h b/compiler/exo/src/ProgressReporter.h

index b0f420df94bcb7808d6609878f9939709d6e38f0..83f327309c0f4e738839bb8332f6e64539e9d478 100644 (file)
--- a/compiler/exo/src/ProgressReporter.h
+++ b/compiler/exo/src/ProgressReporter.h
@@ -28,7 +28,7 @@ class ProgressReporter : public logo::PhaseEventListener
  {
  public:
    ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
-      : _graph{graph}, _strategy{strategy}
+    : _graph{graph}, _strategy{strategy}
    {
      // DO NOTHING
    }
diff --git a/compiler/exo/src/TFLite/TFLExporter.cpp b/compiler/exo/src/TFLite/TFLExporter.cpp

index cf002b3e1a4dd9c07e86f9bce5fc4be0269bc85b..71131b725d2266d8aee31db096acbf73021e6d33 100644 (file)
--- a/compiler/exo/src/TFLite/TFLExporter.cpp
+++ b/compiler/exo/src/TFLite/TFLExporter.cpp
@@ -18,16 +18,15 @@
  
  #include "TFLExporterImpl.h"
  
-#include <stdex/Memory.h>
-
  #include <oops/InternalExn.h>
  
+#include <memory>
  #include <fstream>
  
  namespace exo
  {
  
-TFLExporter::TFLExporter(loco::Graph *graph) : _impl(stdex::make_unique<Impl>(graph))
+TFLExporter::TFLExporter(loco::Graph *graph) : _impl(std::make_unique<Impl>(graph))
  {
    // NOTHING TO DO
  }
diff --git a/compiler/exo/src/TFLite/TFLExporterImpl.cpp b/compiler/exo/src/TFLite/TFLExporterImpl.cpp

index 07adbfb9d018b6fe5a94d4ac1bdc18f9eb1358c2..1f6d1bd59ce2f1075379c789a58311d89546dd9c 100644 (file)
--- a/compiler/exo/src/TFLite/TFLExporterImpl.cpp
+++ b/compiler/exo/src/TFLite/TFLExporterImpl.cpp
@@ -88,7 +88,7 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<OpCode, uint3
          INTERNAL_EXN("Cannot find code for custom op");
  
        operator_codes_vec[idx] =
-          CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+        CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
      }
    }
    return builder.CreateVector(operator_codes_vec);
@@ -146,7 +146,7 @@ void TFLExporter::Impl::exportGraph(loco::Graph *graph)
  
    // encode operator codes
    auto operator_codes =
-      encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
+    encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
  
    // Subgraphs
    Offset<SubGraph> subgraph = exportSubgraph(gd);
diff --git a/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp b/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp

index 866ede6a2892e12f884648b97e9b90092d4ee798..c337b38d3327f45453f63c0becba41bc321c8a93 100644 (file)
--- a/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp
+++ b/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp
@@ -23,7 +23,8 @@
  #include "Knob.h"
  
  #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -56,7 +57,7 @@ template <> loco::FeatureEncode *TFLExporterImplTests::make_node(void)
  {
    loco::FeatureEncode *encode_layer = graph()->nodes()->create<loco::FeatureEncode>();
  
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
    (*encoder->perm())[loco::FeatureAxis::Count] = 0;
    (*encoder->perm())[loco::FeatureAxis::Depth] = 1;
    (*encoder->perm())[loco::FeatureAxis::Height] = 2;
@@ -70,7 +71,7 @@ template <> loco::FeatureDecode *TFLExporterImplTests::make_node(void)
  {
    loco::FeatureDecode *decode_layer = graph()->nodes()->create<loco::FeatureDecode>();
  
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
    (*decoder->perm())[loco::FeatureAxis::Count] = 0;
    (*decoder->perm())[loco::FeatureAxis::Depth] = 1;
    (*decoder->perm())[loco::FeatureAxis::Height] = 2;
@@ -227,7 +228,7 @@ TEST(TFLExporterImplTest, Transpose_simple)
  
      auto bufs = (model->buffers());
      auto *perm_buf =
-        reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
+      reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
  
      ASSERT_EQ(1, perm_buf[0]);
      ASSERT_EQ(2, perm_buf[1]);
@@ -285,7 +286,7 @@ TEST(TFLExporterImplTest, Transpose_from_FilterEncode_FilterDecode)
  
      auto bufs = (model->buffers());
      auto *perm_buf =
-        reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
+      reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
      ASSERT_EQ(3, perm_buf[0]);
      ASSERT_EQ(0, perm_buf[1]);
      ASSERT_EQ(1, perm_buf[2]);
diff --git a/compiler/exo/src/TFLite/TFLExporterUtils.cpp b/compiler/exo/src/TFLite/TFLExporterUtils.cpp

index d35afc9aacd684bff976ca459116815782757672..daec03c40fc78cb6f40871d8ed18c82ea909323b 100644 (file)
--- a/compiler/exo/src/TFLite/TFLExporterUtils.cpp
+++ b/compiler/exo/src/TFLite/TFLExporterUtils.cpp
@@ -78,13 +78,13 @@ tflite::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *
    //
    // NOTE input and output 'feature' map are shape of NHWC
    bool same_padding_criterion_1 =
-      (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
-      (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+    (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+    (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
  
    // For same padding, rear padding is same or bigger than front padding by at most 1
    bool same_padding_criterion_2 =
-      (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
-      (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+    (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+    (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
  
    if (same_padding_criterion_1 && same_padding_criterion_2)
      return tflite::Padding_SAME;
@@ -120,8 +120,7 @@ void registerGraphIOName(loco::Graph *graph, SerializedModelData &gd)
    }
  }
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace
@@ -147,7 +146,7 @@ private:
  void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id)
  {
    assert(node->annot<TFLTensorIndexAnnotation>() == nullptr);
-  node->annot(stdex::make_unique<TFLTensorIndexAnnotation>(tensor_id));
+  node->annot(std::make_unique<TFLTensorIndexAnnotation>(tensor_id));
  }
  
  TFLTensorIndex get_tensor_index(loco::Node *node)
diff --git a/compiler/exo/src/TFLite/TFLOperationExporter.cpp b/compiler/exo/src/TFLite/TFLOperationExporter.cpp

index 79b5b628780fb148762c807882cb35c825bdfb3a..b7a0ffea81fb201aeacebd150695225a78dce645 100644 (file)
--- a/compiler/exo/src/TFLite/TFLOperationExporter.cpp
+++ b/compiler/exo/src/TFLite/TFLOperationExporter.cpp
@@ -81,13 +81,19 @@ public:
    void visit(loco::ReLU *) final;
    void visit(loco::ReLU6 *) final;
    void visit(loco::Tanh *) final;
-  void visit(loco::Push *) final { /* DO NOTHING */}
-  void visit(loco::Pull *) final { /* DO NOTHING */}
+  void visit(loco::Push *) final
+  { /* DO NOTHING */
+  }
+  void visit(loco::Pull *) final
+  { /* DO NOTHING */
+  }
    void visit(loco::FeatureEncode *) final;
    void visit(loco::FeatureDecode *) final;
    void visit(loco::FilterEncode *) final;
    void visit(loco::DepthwiseFilterEncode *) final;
-  void visit(loco::ConstGen *) final { /* skip, everything is done in exportOpDefinedTensors */}
+  void visit(loco::ConstGen *) final
+  { /* skip, everything is done in exportOpDefinedTensors */
+  }
    void visit(loco::MaxPool2D *) final;
    void visit(loco::AvgPool2D *) final;
    void visit(loco::Conv2D *) final;
@@ -227,7 +233,7 @@ void OperationExporter::visit(locoex::TFLFullyConnected *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    auto options =
-      CreateFullyConnectedOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
+    CreateFullyConnectedOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
  
    // Make FULLY_CONNECTED operator
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -367,8 +373,8 @@ void OperationExporter::visit(locoex::TFLTranspose *node)
    auto options = CreateTransposeOptions(builder);
  
    auto op_offset =
-      CreateOperator(builder, op_idx, inputs, outputs,
-                     tflite::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+    CreateOperator(builder, op_idx, inputs, outputs,
+                   tflite::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
    gd._operators.push_back(op_offset);
  }
  
@@ -385,7 +391,7 @@ void OperationExporter::visit(locoex::TFLTransposeConv *node)
    auto outputs = builder.CreateVector(outputs_vec);
    tflite::Padding padding = getOpPadding(node->padding());
    auto options =
-      CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+    CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
  
    // Make TRANSPOSE_CONV operator
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -397,7 +403,7 @@ template <class TFLPool2D>
  void OperationExporter::export_pool_2d(TFLPool2D *node, tflite::BuiltinOperator builtin_op)
  {
    EXO_ASSERT(builtin_op == tflite::BuiltinOperator_MAX_POOL_2D ||
-                 builtin_op == tflite::BuiltinOperator_AVERAGE_POOL_2D,
+               builtin_op == tflite::BuiltinOperator_AVERAGE_POOL_2D,
               "should be maxpool or avgpool");
    EXO_ASSERT(node->padding() != locoex::Padding::UNDEFINED, "Padding is not set");
  
@@ -458,10 +464,10 @@ void OperationExporter::visit(loco::MaxPool2D *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    tflite::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical(), node->window()->horizontal(),
-                                     node->window()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+                        node->window()->horizontal(), node->window()->vertical());
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                    tflite::BuiltinOptions_Pool2DOptions, options.Union());
    gd._operators.push_back(op_offset);
@@ -478,10 +484,10 @@ void OperationExporter::visit(loco::AvgPool2D *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    tflite::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical(), node->window()->horizontal(),
-                                     node->window()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+                        node->window()->horizontal(), node->window()->vertical());
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                    tflite::BuiltinOptions_Pool2DOptions, options.Union());
    gd._operators.push_back(op_offset);
@@ -504,7 +510,7 @@ void OperationExporter::visit(loco::Conv2D *node)
    std::vector<float> bias_vec_data(bias_vec_size); // initialized as zero vector
  
    auto bias_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
  
    auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
  
@@ -516,7 +522,7 @@ void OperationExporter::visit(loco::Conv2D *node)
    auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
  
    auto bias_tensor_offset =
-      CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+    CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
    gd._tensors.push_back(bias_tensor_offset);
  
    // Make input, output and options for operator
@@ -526,9 +532,9 @@ void OperationExporter::visit(loco::Conv2D *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    tflite::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreateConv2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreateConv2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical());
  
    // Make CONV_2D operator
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -558,7 +564,7 @@ void OperationExporter::visit(loco::TransposedConv2D *node)
    }
  
    auto outshape_vec_offset = builder.CreateVector(
-      reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
+    reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
  
    auto outshape_buffer_offset = CreateBuffer(builder, outshape_vec_offset);
  
@@ -607,7 +613,7 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
    size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
    std::vector<float> bias_vec_data(bias_vec_size);
    auto bias_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
  
    auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
  
@@ -619,7 +625,7 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
    auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
  
    auto bias_tensor_offset =
-      CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+    CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
    gd._tensors.push_back(bias_tensor_offset);
  
    std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
@@ -628,13 +634,13 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
    auto inputs = builder.CreateVector(inputs_vec);
    auto outputs = builder.CreateVector(outputs_vec);
    tflite::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
  
    int32_t ifm_channel_size = ShapeInference::get(node->ifm())._dims[3];
    // multiplier = bias_vec_size(output_size)/ifm_channel_size
    auto options =
-      CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
-                                   node->stride()->vertical(), bias_vec_size / ifm_channel_size);
+    CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
+                                 node->stride()->vertical(), bias_vec_size / ifm_channel_size);
  
    auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                    tflite::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
@@ -668,7 +674,7 @@ void OperationExporter::visit(loco::TensorReduce *node)
  
    size_t raw_axes_vec_size = axes_vec_size * sizeof(int32_t);
    auto axes_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
  
    auto axes_buffer_offset = CreateBuffer(builder, axes_vec_offset);
  
@@ -680,7 +686,7 @@ void OperationExporter::visit(loco::TensorReduce *node)
    auto name_offset = builder.CreateString("t_" + std::to_string(axes_tensor_id));
  
    auto axes_tensor_offset =
-      CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
+    CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
    gd._tensors.push_back(axes_tensor_offset);
  
    std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), axes_tensor_id};
@@ -743,7 +749,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
    constexpr size_t raw_perm_vec_size = perm_vec_size * sizeof(int32_t);
  
    auto perm_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
  
    auto perm_buffer_offset = CreateBuffer(builder, perm_vec_offset);
  
@@ -755,7 +761,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
    auto name_offset = builder.CreateString("t_" + std::to_string(perm_tensor_id));
  
    auto perm_tensor_offset =
-      CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
+    CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
    gd._tensors.push_back(perm_tensor_offset);
  
    // Create permutation node
@@ -769,7 +775,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
    constexpr auto options_type = tflite::BuiltinOptions::BuiltinOptions_TransposeOptions;
  
    auto transpose_offset =
-      CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
+    CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
    gd._operators.push_back(transpose_offset);
  }
  
@@ -854,11 +860,11 @@ void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
    //      but also by input.
  
    auto input_shape_shape_vec_offset =
-      builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
+    builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
  
    size_t input_shape_vec_size = new_shape_vec.size() * sizeof(int32_t);
    auto input_shape_input_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
    auto input_shape_buffer_offset = CreateBuffer(builder, input_shape_input_vec_offset);
  
    const auto input_shape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
@@ -867,7 +873,7 @@ void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
    auto input_shape_tensor_id = static_cast<int32_t>(gd._tensors.size());
    auto name_offset = builder.CreateString("t_" + std::to_string(input_shape_tensor_id));
    auto input_shape_tensor_offset = CreateTensor(
-      builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
+    builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
    gd._tensors.push_back(input_shape_tensor_offset);
  
    uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RESHAPE);
@@ -1069,7 +1075,7 @@ void OperationExporter::visit(loco::TensorConstantPad *node)
    auto padding_shape_vec_ptr = builder.CreateVector(std::vector<int32_t>{padding_vec_size, 2});
    // create tensor
    auto padding_tensor_ptr =
-      CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
+    CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
    // get tensor id
    const auto padding_tensor_id = static_cast<int32_t>(gd._tensors.size());
  
diff --git a/compiler/exo/src/TFLite/TFLTensorExporter.cpp b/compiler/exo/src/TFLite/TFLTensorExporter.cpp

index 23c810ed5c1aa325a2447886f0f795d9b83846b0..2fb6f0c13f6fdd4ff897a3af43598bc91ecad4d9 100644 (file)
--- a/compiler/exo/src/TFLite/TFLTensorExporter.cpp
+++ b/compiler/exo/src/TFLite/TFLTensorExporter.cpp
@@ -89,7 +89,7 @@ struct NoOpDetector final : public loco::CanonicalNodeMutableVisitor<bool>
    bool visit(loco::FeatureEncode *node) final
    {
      auto encoder =
-        loco::must_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
+      loco::must_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
      auto perm = encoder->perm();
      return isNHWC(perm);
    }
@@ -97,7 +97,7 @@ struct NoOpDetector final : public loco::CanonicalNodeMutableVisitor<bool>
    bool visit(loco::FeatureDecode *node) final
    {
      auto decoder =
-        loco::must_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
+      loco::must_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
      auto perm = decoder->perm();
      return isNHWC(perm);
    }
diff --git a/compiler/exo/src/TFLite/TFLTypeInference.cpp b/compiler/exo/src/TFLite/TFLTypeInference.cpp

index 8d6bb8d8ca97784808b0ca6c107c639d9a9e0a1c..56817ee3b9085c588148d662297a9becc91dda7d 100644 (file)
--- a/compiler/exo/src/TFLite/TFLTypeInference.cpp
+++ b/compiler/exo/src/TFLite/TFLTypeInference.cpp
@@ -31,8 +31,6 @@
  
  #include <oops/InternalExn.h>
  
-#include <stdex/Memory.h>
-
  #include <stdexcept>
  #include <type_traits>
  
diff --git a/compiler/exo/src/TFLite/TFLTypeInference.test.cpp b/compiler/exo/src/TFLite/TFLTypeInference.test.cpp

index 8a3a08da9e9344dee41f201aadbb7715ee7dfc17..054dad1f1baaac66badde6cf622cdea6c0c9e80a 100644 (file)
--- a/compiler/exo/src/TFLite/TFLTypeInference.test.cpp
+++ b/compiler/exo/src/TFLite/TFLTypeInference.test.cpp
@@ -18,12 +18,9 @@
  #include "Pass/TypeInferencePass.h"
  
  #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
  
  #include <gtest/gtest.h>
  
-using stdex::make_unique;
-
  namespace
  {
  
diff --git a/compiler/exo/src/TestGraph.h b/compiler/exo/src/TestGraph.h

index f919cc9ae6da84c1012a5474449e7a7067aa95e4..46c2264abc76aa62a162132dfde767824fb306cc 100644 (file)
--- a/compiler/exo/src/TestGraph.h
+++ b/compiler/exo/src/TestGraph.h
@@ -23,8 +23,6 @@
  
  #include <loco.h>
  
-#include <stdex/Memory.h>
-
  #include <cassert>
  
  namespace exo
@@ -284,7 +282,7 @@ public:
    {
      filterEncode = exo::make_filter_encode<exo::FilterLayout::HWIO>(pull); // from Tensorflow
      filterDecode =
-        exo::make_filter_decode<exo::FilterLayout::OHWI>(filterEncode); // to Tensorflow Lite
+      exo::make_filter_decode<exo::FilterLayout::OHWI>(filterEncode); // to Tensorflow Lite
      complete(filterDecode);
    }
  };
diff --git a/compiler/exo/src/TestHelper.h b/compiler/exo/src/TestHelper.h

index 1a3de50f5eb9f49ddce0a7644c3e3a8658d754e4..bacaa3e5eee195bebee274ed6d94b2e4a3364d3e 100644 (file)
--- a/compiler/exo/src/TestHelper.h
+++ b/compiler/exo/src/TestHelper.h
@@ -26,7 +26,7 @@
  
  #include <loco.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -54,11 +54,11 @@ public:
    TypeShapeReadyPhase()
    {
      // Type and Shape inference is prerequisite for run other test
-    _phase.emplace_back(stdex::make_unique<::exo::TypeInferencePass>());
-    _phase.emplace_back(stdex::make_unique<::exo::ShapeInferencePass>());
+    _phase.emplace_back(std::make_unique<::exo::TypeInferencePass>());
+    _phase.emplace_back(std::make_unique<::exo::ShapeInferencePass>());
    }
  
-  template <typename PassT> void add_pass() { _phase.emplace_back(stdex::make_unique<PassT>()); }
+  template <typename PassT> void add_pass() { _phase.emplace_back(std::make_unique<PassT>()); }
  
    void run(loco::Graph *g)
    {
diff --git a/compiler/foder/CMakeLists.txt b/compiler/foder/CMakeLists.txt

index 6a413c61e5fa0087eade054ac69fa2634b770962..2e44eefa62aef9504710abc5e4658dce65ef723f 100644 (file)
--- a/compiler/foder/CMakeLists.txt
+++ b/compiler/foder/CMakeLists.txt
@@ -1,2 +1,3 @@
  add_library(foder INTERFACE)
  target_include_directories(foder INTERFACE include)
+target_link_libraries(foder INTERFACE nncc_coverage)
diff --git a/compiler/foder/include/foder/FileLoader.h b/compiler/foder/include/foder/FileLoader.h

index e2143ecf6050932c2677629150fa2f626543f4c4..f0b052a63b20f9a2ac2662bb7666b5faeaa6ab5a 100644 (file)
--- a/compiler/foder/include/foder/FileLoader.h
+++ b/compiler/foder/include/foder/FileLoader.h
@@ -14,6 +14,9 @@
   * limitations under the License.
   */
  
+#ifndef __FODER_FILE_LOADER_H__
+#define __FODER_FILE_LOADER_H__
+
  #include <fstream>
  #include <vector>
  
@@ -67,3 +70,5 @@ private:
  };
  
  } // namespace foder
+
+#endif // __FODER_FILE_LOADER_H__
diff --git a/compiler/hermes-std/CMakeLists.txt b/compiler/hermes-std/CMakeLists.txt

index c7b02e14c6266546d2dd2b158e36a95664bdb4a9..8fce319538158ed222516500d96d47b6646db738 100644 (file)
--- a/compiler/hermes-std/CMakeLists.txt
+++ b/compiler/hermes-std/CMakeLists.txt
@@ -6,7 +6,6 @@ add_library(hermes_std STATIC ${SOURCES})
  set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(hermes_std PUBLIC include)
  target_link_libraries(hermes_std PUBLIC hermes)
-target_link_libraries(hermes_std PRIVATE stdex)
  target_link_libraries(hermes_std PRIVATE pepper_strcast)
  # Let's apply nncc common compile options
  #
@@ -23,5 +22,4 @@ endif(NOT ENABLE_TEST)
  nnas_find_package(GTest REQUIRED)
  
  GTest_AddTest(hermes_std_test ${TESTS})
-target_link_libraries(hermes_std_test stdex)
  target_link_libraries(hermes_std_test hermes_std)
diff --git a/compiler/hermes-std/src/ConsoleReporter.test.cpp b/compiler/hermes-std/src/ConsoleReporter.test.cpp

index c2e1f1c8566d54be5641ee1ef5b5c275a8e51b9a..a65585a6a7913177f029daa28b68a621d58c35fa 100644 (file)
--- a/compiler/hermes-std/src/ConsoleReporter.test.cpp
+++ b/compiler/hermes-std/src/ConsoleReporter.test.cpp
@@ -16,8 +16,7 @@
  
  #include "hermes/ConsoleReporter.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <sstream>
  
  #include <gtest/gtest.h>
@@ -37,7 +36,7 @@ TEST(ConsoleReporterTest, notify)
  
      ss << "Hello" << std::endl;
  
-    m.text(stdex::make_unique<hermes::MessageText>(ss));
+    m.text(std::make_unique<hermes::MessageText>(ss));
    }
  
    hermes::ConsoleReporter r;
diff --git a/compiler/hermes-std/src/EnvConfig.test.cpp b/compiler/hermes-std/src/EnvConfig.test.cpp

new file mode 100644 (file)

index 0000000..e4b39c1
--- /dev/null
+++ b/compiler/hermes-std/src/EnvConfig.test.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/EnvConfig.h"
+
+#include <hermes/core/SourceSetting.h>
+
+#include <gtest/gtest.h>
+
+#include <stdlib.h>
+
+namespace
+{
+
+class Logger final : public hermes::Source
+{
+public:
+  Logger() = default;
+  ~Logger() = default;
+};
+
+std::string env_name("TEST_CONFIG");
+
+} // namespace
+
+TEST(EnvConfigTest, constructor)
+{
+  hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+  SUCCEED();
+}
+
+TEST(EnvConfigTest, configure)
+{
+  Logger logger;
+  hermes::SourceSetting ss;
+  hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+  ec.configure(&logger, ss);
+
+  SUCCEED();
+}
+
+TEST(EnvConfigTest, configure_enabled)
+{
+  setenv(env_name.c_str(), "1", 0);
+
+  Logger logger;
+  hermes::SourceSetting ss;
+  hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+  ec.configure(&logger, ss);
+
+  SUCCEED();
+}
diff --git a/compiler/hermes/CMakeLists.txt b/compiler/hermes/CMakeLists.txt

index 5debfbca050f63a90abbf60cd08aea7e9c093d0c..e1a71c2b47bcd67f1ea6adf60d87314053bdd4e8 100644 (file)
--- a/compiler/hermes/CMakeLists.txt
+++ b/compiler/hermes/CMakeLists.txt
@@ -5,7 +5,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
  add_library(hermes STATIC ${SOURCES})
  set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(hermes PUBLIC include)
-target_link_libraries(hermes PRIVATE stdex)
  # Let's apply nncc common compile options
  #
  # NOTE This will enable strict compilation (warnings as error).
@@ -22,7 +21,6 @@ nnas_find_package(GTest REQUIRED)
  
  add_executable(hermes_test ${TESTS})
  target_link_libraries(hermes_test gtest_main)
-target_link_libraries(hermes_test stdex)
  target_link_libraries(hermes_test hermes)
  
  add_test(hermes_test hermes_test)
diff --git a/compiler/hermes/requires.cmake b/compiler/hermes/requires.cmake

index a4855289c536fe5e7767f2ff77524b553e8e2e02..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 (file)
--- a/compiler/hermes/requires.cmake
+++ b/compiler/hermes/requires.cmake
@@ -1 +0,0 @@
-require("stdex")
diff --git a/compiler/hermes/src/core/MessageBuffer.cpp b/compiler/hermes/src/core/MessageBuffer.cpp

index 175a45d3f28cafc8159b273b71bcd25adf57ce12..a4ff4eeffe76b7c0d2c0d522167e1120a987dade 100644 (file)
--- a/compiler/hermes/src/core/MessageBuffer.cpp
+++ b/compiler/hermes/src/core/MessageBuffer.cpp
@@ -16,7 +16,7 @@
  
  #include "hermes/core/MessageBuffer.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace hermes
  {
@@ -30,9 +30,9 @@ MessageBuffer::~MessageBuffer()
  {
    // NOTE The current implementation is unsafe as it may throw an excpetion.
    // TODO Find a better safe implementation.
-  auto msg = stdex::make_unique<Message>();
+  auto msg = std::make_unique<Message>();
  
-  msg->text(stdex::make_unique<MessageText>(_ss));
+  msg->text(std::make_unique<MessageText>(_ss));
  
    _bus->post(std::move(msg));
  }
diff --git a/compiler/hermes/src/core/Source.cpp b/compiler/hermes/src/core/Source.cpp

index 33f8b0570b23ff02ae2632e78c084bb347d05854..d124f44304059e2a617f7024b9bd2190571174a3 100644 (file)
--- a/compiler/hermes/src/core/Source.cpp
+++ b/compiler/hermes/src/core/Source.cpp
@@ -16,8 +16,7 @@
  
  #include "hermes/core/Source.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace hermes
@@ -64,7 +63,7 @@ void Source::reload(const Config *c) { c->configure(this, _setting); }
  std::unique_ptr<MessageBuffer> Source::buffer(const Severity &) const
  {
    // TODO Pass Severity
-  return stdex::make_unique<MessageBuffer>(_bus);
+  return std::make_unique<MessageBuffer>(_bus);
  }
  
  } // namespace hermes
diff --git a/compiler/loco/CMakeLists.txt b/compiler/loco/CMakeLists.txt

index f940528403212c05ba88b006e293a161399de0ff..b1f61ade04805dc0a3d12da43bbacb2bba093f76 100644 (file)
--- a/compiler/loco/CMakeLists.txt
+++ b/compiler/loco/CMakeLists.txt
@@ -6,7 +6,6 @@ add_library(loco SHARED ${SOURCES})
  target_include_directories(loco PUBLIC include)
  # TODO Remove dependencies on angkor library
  target_link_libraries(loco PUBLIC angkor)
-target_link_libraries(loco PRIVATE stdex)
  # Let's apply nncc common compile options
  #
  # NOTE This will enable strict compilation (warnings as error).
@@ -24,5 +23,4 @@ endif(NOT ENABLE_TEST)
  nnas_find_package(GTest REQUIRED)
  
  GTest_AddTest(loco_test ${TESTS})
-target_link_libraries(loco_test stdex)
  target_link_libraries(loco_test loco)
diff --git a/compiler/loco/include/loco/IR/DataTypeTraits.h b/compiler/loco/include/loco/IR/DataTypeTraits.h

index c186300de0c571511e783bf6970389646fcb597d..3713ac992e584f8416f4a469f488e60b264eff5d 100644 (file)
--- a/compiler/loco/include/loco/IR/DataTypeTraits.h
+++ b/compiler/loco/include/loco/IR/DataTypeTraits.h
@@ -52,6 +52,12 @@ template <> struct DataTypeImpl<DataType::S16>
    using Type = int16_t;
  };
  
+template <> struct DataTypeImpl<DataType::U16>
+{
+  // Use C++ uint16_t type for unsigned 16bit integer
+  using Type = uint16_t;
+};
+
  template <> struct DataTypeImpl<DataType::S32>
  {
    // Use C++ int32_t type for 32bit integer
@@ -70,12 +76,24 @@ template <> struct DataTypeImpl<DataType::S64>
    using Type = int64_t;
  };
  
+template <> struct DataTypeImpl<DataType::U64>
+{
+  // Use C++ uint64_t type for unsigned 64bit integer
+  using Type = uint64_t;
+};
+
  template <> struct DataTypeImpl<DataType::FLOAT32>
  {
    // Use C++ float type for IEEE 32-bit floating-point numbers
    using Type = float;
  };
  
+template <> struct DataTypeImpl<DataType::FLOAT64>
+{
+  // Use C++ double type for IEEE 64-bit floating-point numbers
+  using Type = double;
+};
+
  // NOTE DataTypeImpl for BOOL is subject to change
  template <> struct DataTypeImpl<DataType::BOOL>
  {
@@ -97,14 +115,20 @@ inline uint32_t size(DataType data_type)
        return sizeof(DataTypeImpl<DataType::U8>::Type);
      case DataType::S16:
        return sizeof(DataTypeImpl<DataType::S16>::Type);
+    case DataType::U16:
+      return sizeof(DataTypeImpl<DataType::U16>::Type);
      case DataType::S32:
        return sizeof(DataTypeImpl<DataType::S32>::Type);
      case DataType::U32:
        return sizeof(DataTypeImpl<DataType::U32>::Type);
      case DataType::S64:
        return sizeof(DataTypeImpl<DataType::S64>::Type);
+    case DataType::U64:
+      return sizeof(DataTypeImpl<DataType::U64>::Type);
      case DataType::FLOAT32:
        return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
+    case DataType::FLOAT64:
+      return sizeof(DataTypeImpl<DataType::FLOAT64>::Type);
      case DataType::BOOL:
        return sizeof(DataTypeImpl<DataType::BOOL>::Type);
      default:
diff --git a/compiler/loco/include/loco/IR/Nodes.h b/compiler/loco/include/loco/IR/Nodes.h

index fecfad28d2fb655dff37a0de637c62ada56c9be8..63b1181bb003dd513d166812e0760aca1422eb25 100644 (file)
--- a/compiler/loco/include/loco/IR/Nodes.h
+++ b/compiler/loco/include/loco/IR/Nodes.h
@@ -49,7 +49,7 @@ class GraphOutput;
   * @brief Make a value visible to user
   */
  class Push /* to user */ final
-    : public CanonicalNodeDef<CanonicalOpcode::Push, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::Push, FixedArity<1>::Mixin>
  {
  public:
    Push() = default;
@@ -91,8 +91,8 @@ Push *push_node(Graph *g, const GraphOutputIndex &index);
   * @brief Create a value from user data
   */
  class Pull /* from user */ final
-    : public CanonicalNodeDef<CanonicalOpcode::Pull, FixedArity<0>::Mixin,
-                              With<NodeTrait::TensorShape>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::Pull, FixedArity<0>::Mixin,
+                            With<NodeTrait::TensorShape>::Mixin>
  {
  public:
    Pull() = default;
@@ -213,8 +213,8 @@ public:
   * }
   */
  class ConstGen final
-    : public CanonicalNodeDef<CanonicalOpcode::ConstGen, FixedArity<0>::Mixin,
-                              With<NodeTrait::DataType>::Mixin, With<NodeTrait::TensorShape>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::ConstGen, FixedArity<0>::Mixin,
+                            With<NodeTrait::DataType>::Mixin, With<NodeTrait::TensorShape>::Mixin>
  {
  public:
    ConstGen() = default;
@@ -376,7 +376,7 @@ private:
   * @brief Create a feature map from a tensor
   */
  class FeatureEncode final
-    : public CanonicalNodeDef<CanonicalOpcode::FeatureEncode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FeatureEncode, FixedArity<1>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -395,7 +395,7 @@ private:
   * @brief Create a tensor from a feature map
   */
  class FeatureDecode final
-    : public CanonicalNodeDef<CanonicalOpcode::FeatureDecode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FeatureDecode, FixedArity<1>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -414,7 +414,7 @@ private:
   * @brief Create a filter from a tensor
   */
  class FilterEncode final
-    : public CanonicalNodeDef<CanonicalOpcode::FilterEncode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FilterEncode, FixedArity<1>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -433,7 +433,7 @@ private:
   * @brief Create a tensor from a filter
   */
  class FilterDecode final
-    : public CanonicalNodeDef<CanonicalOpcode::FilterDecode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FilterDecode, FixedArity<1>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -452,7 +452,7 @@ private:
   * @brief Create a depthwise filter from a tensor
   */
  class DepthwiseFilterEncode final
-    : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterEncode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterEncode, FixedArity<1>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -471,7 +471,7 @@ private:
   * @brief Create a tensor from a depthwise filter
   */
  class DepthwiseFilterDecode final
-    : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterDecode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterDecode, FixedArity<1>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -512,8 +512,8 @@ template <ReshapeType RT> class Reshape;
   */
  template <>
  class Reshape<ReshapeType::Fixed> final
-    : public CanonicalNodeDef<CanonicalOpcode::FixedReshape, FixedArity<1>::Mixin,
-                              With<NodeTrait::TensorShape>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FixedReshape, FixedArity<1>::Mixin,
+                            With<NodeTrait::TensorShape>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -529,7 +529,7 @@ using FixedReshape = Reshape<ReshapeType::Fixed>;
   * concatenated along the given axis.
   */
  class TensorConcat final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorConcat, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorConcat, FixedArity<2>::Mixin>
  {
  public:
    Node *lhs(void) const { return at(0)->node(); }
@@ -578,7 +578,7 @@ private:
   * @brief Depthwise 2D Convolution
   */
  class DepthwiseConv2D final
-    : public CanonicalNodeDef<CanonicalOpcode::DepthwiseConv2D, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::DepthwiseConv2D, FixedArity<2>::Mixin>
  {
  public:
    Node *ifm(void) const { return at(0)->node(); }
@@ -616,7 +616,7 @@ enum class ReduceFunc
   * @note  All the reduce functions always keep dimensions
   */
  class TensorReduce final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorReduce, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorReduce, FixedArity<1>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -684,7 +684,7 @@ private:
   * With this, output shape is uniquely determined by all inputs and attributes.
   */
  class TransposedConv2D final
-    : public CanonicalNodeDef<CanonicalOpcode::TransposedConv2D, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TransposedConv2D, FixedArity<2>::Mixin>
  {
  public:
    Node *ifm(void) const { return at(0)->node(); }
@@ -714,11 +714,11 @@ private:
  template <Domain D> class Softmax;
  
  /**
-* @brief Computes softmax activations for Tensor domain
-*/
+ * @brief Computes softmax activations for Tensor domain
+ */
  template <>
  class Softmax<Domain::Tensor> final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorSoftmax, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorSoftmax, FixedArity<1>::Mixin>
  {
  public:
    Softmax() = default;
@@ -777,7 +777,7 @@ template <Domain D> class BiasAdd;
   */
  template <>
  class BiasAdd<Domain::Tensor> final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorBiasAdd, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorBiasAdd, FixedArity<2>::Mixin>
  {
  public:
    BiasAdd() = default;
@@ -813,7 +813,7 @@ using TensorBiasAdd = BiasAdd<Domain::Tensor>;
   */
  template <>
  class BiasAdd<Domain::Feature> final
-    : public CanonicalNodeDef<CanonicalOpcode::FeatureBiasAdd, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FeatureBiasAdd, FixedArity<2>::Mixin>
  {
  public:
    BiasAdd() = default;
@@ -848,7 +848,7 @@ using FeatureBiasAdd = BiasAdd<Domain::Feature>;
   * [padding.front(0) + 1 + padding.back(0), padding.front(1) + 2 + padding.back(1)] = [4,9].
   */
  class TensorConstantPad final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorConstantPad, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorConstantPad, FixedArity<2>::Mixin>
  {
  public:
    Node *input(void) const { return at(0)->node(); }
@@ -951,7 +951,7 @@ public:
   * @brief Elementwise Sqrt of input
   */
  class EltwiseSqrt final
-    : public CanonicalNodeDef<CanonicalOpcode::EltwiseSqrt, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::EltwiseSqrt, FixedArity<1>::Mixin>
  {
  public:
    EltwiseSqrt() = default;
@@ -976,7 +976,7 @@ public:
   * TODO Explain the operation semantics
   */
  class TensorBroadcast final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorBroadcast, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorBroadcast, FixedArity<1>::Mixin>
  {
  public:
    TensorBroadcast() = default;
@@ -1014,7 +1014,7 @@ private:
   * MatrixEncode currently requires a rank-2 Tensor as its input.
   */
  class MatrixEncode final
-    : public CanonicalNodeDef<CanonicalOpcode::MatrixEncode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::MatrixEncode, FixedArity<1>::Mixin>
  {
  public:
    MatrixEncode() = default;
@@ -1038,7 +1038,7 @@ private:
   * MatrixDecode currently requires a Matrix as its input.
   */
  class MatrixDecode final
-    : public CanonicalNodeDef<CanonicalOpcode::MatrixDecode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::MatrixDecode, FixedArity<1>::Mixin>
  {
  public:
    MatrixDecode() = default;
@@ -1086,7 +1086,7 @@ public:
   * Input and output belong to tensor domain.
   */
  class TensorTranspose final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorTranspose, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorTranspose, FixedArity<1>::Mixin>
  {
  public:
    TensorTranspose() = default;
diff --git a/compiler/loco/include/loco/IR/Padding2D.h b/compiler/loco/include/loco/IR/Padding2D.h

index 30557a8917cc76323d89f9d8ac8c628a681891c7..b50a8045f7cd9ec8b9fad5d6d679d69f05a81e72 100644 (file)
--- a/compiler/loco/include/loco/IR/Padding2D.h
+++ b/compiler/loco/include/loco/IR/Padding2D.h
@@ -32,7 +32,7 @@ public:
  
  public:
    Padding2D(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
-      : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+    : _top{top}, _bottom{bottom}, _left{left}, _right{right}
    {
      // DO NOTHING
    }
diff --git a/compiler/loco/requires.cmake b/compiler/loco/requires.cmake

new file mode 100644 (file)

index 0000000..654db88
--- /dev/null
+++ b/compiler/loco/requires.cmake
@@ -0,0 +1 @@
+require("angkor")
diff --git a/compiler/loco/src/ADT/AnnotatedItem.test.cpp b/compiler/loco/src/ADT/AnnotatedItem.test.cpp

index 45ca87d75f955893c28b4acda97c6254510bd67f..87e597f5c530e8b4ba4d967720b6c9bc89ba9088 100644 (file)
--- a/compiler/loco/src/ADT/AnnotatedItem.test.cpp
+++ b/compiler/loco/src/ADT/AnnotatedItem.test.cpp
@@ -17,7 +17,8 @@
  #include "loco/ADT/AnnotatedItem.h"
  
  #include <gtest/gtest.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -31,7 +32,7 @@ template <int N> struct DerivedAnnotation final : public Annotation
  {
    static std::unique_ptr<DerivedAnnotation<N>> make(void)
    {
-    return stdex::make_unique<DerivedAnnotation<N>>();
+    return std::make_unique<DerivedAnnotation<N>>();
    }
  };
  
diff --git a/compiler/loco/src/IR/CanonicalDialect.cpp b/compiler/loco/src/IR/CanonicalDialect.cpp

index ea956b80e8c148dece7935c357d284c720d87578..9438956f810e4890dce579a9a9c1e0cdb74e30ba 100644 (file)
--- a/compiler/loco/src/IR/CanonicalDialect.cpp
+++ b/compiler/loco/src/IR/CanonicalDialect.cpp
@@ -18,8 +18,7 @@
  #include "loco/IR/Graph.h"
  #include "loco/IR/Nodes.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  #include <stdexcept>
  
@@ -55,7 +54,7 @@ namespace loco
  
  CanonicalDialect::CanonicalDialect()
  {
-  service<GraphOutputIndexQueryService>(stdex::make_unique<GraphOutputIndexQueryServiceImpl>());
+  service<GraphOutputIndexQueryService>(std::make_unique<GraphOutputIndexQueryServiceImpl>());
  }
  
  Dialect *CanonicalDialect::get(void)
diff --git a/compiler/loco/src/IR/Dialect.test.cpp b/compiler/loco/src/IR/Dialect.test.cpp

index 3af3033752008b46634cfbd6e114b24f773ddc49..447f443f2a277bfd89ed47fe0841ec9b595a91e8 100644 (file)
--- a/compiler/loco/src/IR/Dialect.test.cpp
+++ b/compiler/loco/src/IR/Dialect.test.cpp
@@ -16,7 +16,7 @@
  
  #include "loco/IR/Dialect.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -31,7 +31,7 @@ TEST(DialectTest, service)
  
    struct MockDialect final : public loco::Dialect
    {
-    MockDialect() { service<S1>(stdex::make_unique<S1>()); }
+    MockDialect() { service<S1>(std::make_unique<S1>()); }
    };
  
    MockDialect dialect;
diff --git a/compiler/loco/src/IR/Graph.cpp b/compiler/loco/src/IR/Graph.cpp

index 8073d45453a32e0868208bda1f562a5890b6734c..98b22c3b6641b6dc2f58c654c8bc54c5bc140ffb 100644 (file)
--- a/compiler/loco/src/IR/Graph.cpp
+++ b/compiler/loco/src/IR/Graph.cpp
@@ -16,8 +16,7 @@
  
  #include "loco/IR/Graph.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace
@@ -25,7 +24,7 @@ namespace
  
  std::unique_ptr<loco::TensorShape> make_tensor_shape(std::initializer_list<loco::Dimension> dims)
  {
-  auto tensor_shape = stdex::make_unique<loco::TensorShape>();
+  auto tensor_shape = std::make_unique<loco::TensorShape>();
  
    tensor_shape->rank(dims.size());
    {
@@ -50,14 +49,11 @@ void Mixin<Trait::TensorShaped>::shape(std::initializer_list<Dimension> dims)
    shape(make_tensor_shape(dims));
  }
  
-GraphInput *Graph::InputContext::create(void)
-{
-  return take(stdex::make_unique<GraphInput>(size()));
-}
+GraphInput *Graph::InputContext::create(void) { return take(std::make_unique<GraphInput>(size())); }
  
  GraphOutput *Graph::OutputContext::create(void)
  {
-  return take(stdex::make_unique<GraphOutput>(size()));
+  return take(std::make_unique<GraphOutput>(size()));
  }
  
  std::set<loco::Node *> all_nodes(loco::Graph *g)
diff --git a/compiler/loco/src/IR/Graph.test.cpp b/compiler/loco/src/IR/Graph.test.cpp

index ad6894f30aa6db6ed958fc6e85bc94908eddc94f..837d29326255f61781da74ab1cbd3c893ab9fa32 100644 (file)
--- a/compiler/loco/src/IR/Graph.test.cpp
+++ b/compiler/loco/src/IR/Graph.test.cpp
@@ -108,7 +108,7 @@ namespace
  {
  // temp node with multple params for ctor. loco::CanonicalOpcode::ReLU is used for simplicity
  class ParamCtorNode
-    : public loco::CanonicalNodeDef<loco::CanonicalOpcode::ReLU, loco::FixedArity<0>::Mixin>
+  : public loco::CanonicalNodeDef<loco::CanonicalOpcode::ReLU, loco::FixedArity<0>::Mixin>
  {
  public:
    ParamCtorNode(int i, float f)
diff --git a/compiler/loco/src/IR/PermutingCodec.cpp b/compiler/loco/src/IR/PermutingCodec.cpp

index 2857e5e289142b20cc6a08b0e2a220ae9ae45917..e9fd1fb12bd0fada1492a035d2349b619eecfe7e 100644 (file)
--- a/compiler/loco/src/IR/PermutingCodec.cpp
+++ b/compiler/loco/src/IR/PermutingCodec.cpp
@@ -16,8 +16,7 @@
  
  #include "loco/IR/PermutingCodec.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  #include <set>
  #include <stdexcept>
@@ -139,7 +138,7 @@ TensorIndex PermutingEncoder<Domain::Feature>::value(const FeatureIndex &in) con
  
  std::unique_ptr<FeatureEncoder> PermutingEncoder<Domain::Feature>::clone(void) const
  {
-  return stdex::make_unique<PermutingEncoder<Domain::Feature>>(_perm);
+  return std::make_unique<PermutingEncoder<Domain::Feature>>(_perm);
  }
  
  bool PermutingEncoder<Domain::Feature>::valid(void) const { return ::valid(_perm); }
@@ -179,7 +178,7 @@ FeatureIndex PermutingDecoder<Domain::Feature>::value(const TensorIndex &in) con
  
  std::unique_ptr<FeatureDecoder> PermutingDecoder<Domain::Feature>::clone(void) const
  {
-  return stdex::make_unique<PermutingDecoder<Domain::Feature>>(_perm);
+  return std::make_unique<PermutingDecoder<Domain::Feature>>(_perm);
  }
  
  bool PermutingDecoder<Domain::Feature>::valid(void) const { return ::valid(_perm); }
diff --git a/compiler/loco/src/IR/Verifier.test.cpp b/compiler/loco/src/IR/Verifier.test.cpp

index 8c40a5058359ef038a230939b11c047523b481c1..8a92a35f0255c03ab8c8294dcbc5825991ed1f8f 100644 (file)
--- a/compiler/loco/src/IR/Verifier.test.cpp
+++ b/compiler/loco/src/IR/Verifier.test.cpp
@@ -18,10 +18,10 @@
  
  #include <gtest/gtest.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  #include <vector>
  
-using stdex::make_unique;
+using std::make_unique;
  
  TEST(VerifierTest, valid_minimal)
  {
diff --git a/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp b/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp

index 6d5adc525535cd445af9700ef53ac2bae5128b10..a0f0e892a6ade5902c6b825b2498cdb90c108c69 100644 (file)
--- a/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp
+++ b/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp
@@ -674,7 +674,7 @@ public:
      for (uint32_t axis = 0; axis < out_shape.rank(); ++axis)
      {
        out_shape.dim(axis) =
-          tensor_shape.dim(axis).value() + padding->front(axis) + padding->back(axis);
+        tensor_shape.dim(axis).value() + padding->front(axis) + padding->back(axis);
      }
  
      return loco::NodeShape{out_shape};
diff --git a/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp b/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp

index e88872b5d2f6ae4a2fa2d5f6c9ff1cfc399540db..0e0dec1a5efced0a8beb1c4985303b021d63b8d3 100644 (file)
--- a/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp
+++ b/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp
@@ -122,7 +122,7 @@ TEST(CanonicalShapeInferenceRuleTest, avgpool2d)
  
    testcase.pull_node->shape({1, 8, 4, 3});
  
-  testcase.encode_node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+  testcase.encode_node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
  
    testcase.avgpool2d_node->window()->vertical(2);
    testcase.avgpool2d_node->window()->horizontal(2);
@@ -130,7 +130,7 @@ TEST(CanonicalShapeInferenceRuleTest, avgpool2d)
    testcase.avgpool2d_node->stride()->vertical(2);
    testcase.avgpool2d_node->stride()->horizontal(2);
  
-  testcase.decode_node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+  testcase.decode_node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
  
    // Run Inference
    loco::CanonicalShapeInferenceRule rule;
@@ -224,7 +224,7 @@ TEST(CanonicalShapeInferenceRuleTest, maxpool2d)
  
    testcase.pull_node->shape({1, 8, 4, 3});
  
-  testcase.encode_node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+  testcase.encode_node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
  
    testcase.maxpool2d_node->window()->vertical(2);
    testcase.maxpool2d_node->window()->horizontal(2);
@@ -232,7 +232,7 @@ TEST(CanonicalShapeInferenceRuleTest, maxpool2d)
    testcase.maxpool2d_node->stride()->vertical(2);
    testcase.maxpool2d_node->stride()->horizontal(2);
  
-  testcase.decode_node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+  testcase.decode_node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
  
    // Run Inference
    loco::CanonicalShapeInferenceRule rule;
diff --git a/compiler/loco/src/Service/GraphBuilder.h b/compiler/loco/src/Service/GraphBuilder.h

index 71084673c002d6eb12698610547a735bb182f8c7..74eed2af80b999cdf97a2df9a31960bbee719c3c 100644 (file)
--- a/compiler/loco/src/Service/GraphBuilder.h
+++ b/compiler/loco/src/Service/GraphBuilder.h
@@ -20,10 +20,8 @@
  // loco-internal headers
  #include "loco/IR/Graph.h"
  
-// repo-internal headers
-#include <stdex/Memory.h>
-
  // C++ standard headers
+#include <memory>
  #include <stack>
  
  //
@@ -90,7 +88,7 @@ public:
    // "Layer" is in theory a subgraph builder.
    template <typename Layer, typename... Args>
    auto push(Args &&... args)
-      -> decltype(static_cast<Layer *>(nullptr)->operator()(static_cast<Context *>(nullptr)))
+    -> decltype(static_cast<Layer *>(nullptr)->operator()(static_cast<Context *>(nullptr)))
    {
      Layer layer{std::forward<Args>(args)...};
      return layer(ctx());
@@ -108,7 +106,7 @@ private:
  
  static inline std::unique_ptr<GraphBuilder> make_graph_builder(loco::Graph *g)
  {
-  return stdex::make_unique<GraphBuilder>(g);
+  return std::make_unique<GraphBuilder>(g);
  }
  
  // "InputLayer" creates both GraphInput and Pull node at once
@@ -159,7 +157,7 @@ struct InputLayer final
  
      ctx->stack()->push(pull_node);
  
-    return stdex::make_unique<Return>(graph_input, pull_node);
+    return std::make_unique<Return>(graph_input, pull_node);
    }
  };
  
@@ -205,7 +203,7 @@ struct OutputLayer final
  
      ctx->stack()->push(push_node);
  
-    return stdex::make_unique<Return>(graph_output, push_node);
+    return std::make_unique<Return>(graph_output, push_node);
    }
  };
  
@@ -236,7 +234,7 @@ struct ReLULayer final
  
      ctx->stack()->push(relu_node);
  
-    return stdex::make_unique<Return>(relu_node);
+    return std::make_unique<Return>(relu_node);
    }
  };
  
@@ -263,7 +261,7 @@ struct ConstGenLayer final
  
      ctx->stack()->push(const_node);
  
-    return stdex::make_unique<Return>(const_node);
+    return std::make_unique<Return>(const_node);
    }
  };
  
@@ -283,7 +281,7 @@ struct FeatureEncodeLayer final
      Return *perm(const loco::Permutation<loco::Domain::Feature> &perm)
      {
        using namespace loco;
-      _node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+      _node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
        return this;
      }
  
@@ -302,7 +300,7 @@ struct FeatureEncodeLayer final
  
      ctx->stack()->push(encode_node);
  
-    return stdex::make_unique<Return>(encode_node);
+    return std::make_unique<Return>(encode_node);
    }
  };
  
@@ -320,7 +318,7 @@ struct FeatureDecodeLayer final
      Return *perm(const loco::Permutation<loco::Domain::Feature> &perm)
      {
        using namespace loco;
-      _node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+      _node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
        return this;
      }
  
@@ -341,7 +339,7 @@ struct FeatureDecodeLayer final
  
      ctx->stack()->push(decode_node);
  
-    return stdex::make_unique<Return>(decode_node);
+    return std::make_unique<Return>(decode_node);
    }
  };
  
@@ -358,7 +356,7 @@ struct FilterEncodeLayer final
    public:
      Return *perm(const loco::Permutation<loco::Domain::Filter> &perm)
      {
-      auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+      auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
        encoder->perm(perm);
        _node->encoder(std::move(encoder));
        return this;
@@ -379,7 +377,7 @@ struct FilterEncodeLayer final
  
      ctx->stack()->push(encode_node);
  
-    return stdex::make_unique<Return>(encode_node);
+    return std::make_unique<Return>(encode_node);
    }
  };
  
@@ -397,7 +395,7 @@ struct DepthwiseFilterEncodeLayer final
      Return *perm(const loco::Permutation<loco::Domain::DepthwiseFilter> &perm)
      {
        using namespace loco;
-      _node->encoder(stdex::make_unique<PermutingEncoder<Domain::DepthwiseFilter>>(perm));
+      _node->encoder(std::make_unique<PermutingEncoder<Domain::DepthwiseFilter>>(perm));
        return this;
      }
  
@@ -416,7 +414,7 @@ struct DepthwiseFilterEncodeLayer final
  
      ctx->stack()->push(encode_node);
  
-    return stdex::make_unique<Return>(encode_node);
+    return std::make_unique<Return>(encode_node);
    }
  };
  
@@ -446,7 +444,7 @@ struct DepthwiseConv2DLayer final
  
      ctx->stack()->push(depthwiseconv2d_node);
  
-    return stdex::make_unique<Return>(depthwiseconv2d_node);
+    return std::make_unique<Return>(depthwiseconv2d_node);
    }
  };
  
@@ -476,7 +474,7 @@ struct TransposedConv2DLayer final
  
      ctx->stack()->push(tr_conv2d_node);
  
-    return stdex::make_unique<Return>(tr_conv2d_node);
+    return std::make_unique<Return>(tr_conv2d_node);
    }
  };
  
@@ -512,7 +510,7 @@ struct FixedReshapeLayer final
  
      ctx->stack()->push(reshape_node);
  
-    return stdex::make_unique<Return>(reshape_node);
+    return std::make_unique<Return>(reshape_node);
    }
  };
  
@@ -540,7 +538,7 @@ struct TensorBroadcastLayer final
      broadcast_node->input(ctx->stack()->pop());
      ctx->stack()->push(broadcast_node);
  
-    return stdex::make_unique<Return>(broadcast_node);
+    return std::make_unique<Return>(broadcast_node);
    }
  };
  
diff --git a/compiler/loco/src/Service/GraphTestcase.h b/compiler/loco/src/Service/GraphTestcase.h

index 27b011f8d866711132dec2dbca2dc95531ddd810..06801e0aa7ef65a3820bcbe5a445257d8cbf92c1 100644 (file)
--- a/compiler/loco/src/Service/GraphTestcase.h
+++ b/compiler/loco/src/Service/GraphTestcase.h
@@ -22,8 +22,6 @@
  
  #include "GraphBuilder.h"
  
-#include <stdex/Memory.h>
-
  enum class GraphCode
  {
    Identity,
@@ -278,7 +276,7 @@ public:
      const_node = graph_builder->push<ConstGenLayer>()->node();
  
      filter_encode_node =
-        graph_builder->push<DepthwiseFilterEncodeLayer>()->perm(filter_perm)->node();
+      graph_builder->push<DepthwiseFilterEncodeLayer>()->perm(filter_perm)->node();
  
      depthwiseconv2d_node = graph_builder->push<DepthwiseConv2DLayer>()->node();
  
diff --git a/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp b/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp

index 3d5a11ae42c48f43ba79833f806eae90cd8c9a66..7be41f7ee84a486ee1ebf4730d3b4459502138a8 100644 (file)
--- a/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp
+++ b/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp
@@ -112,8 +112,8 @@ TEST(MultiDialectShapeInferenceRuleTest, test1)
    loco::MultiDialectShapeInferenceRule rules;
  
    rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(TestDialect<2, 3>::get(), &t23_rule)
-      .bind(TestDialect<4, 5>::get(), &t45_rule);
+    .bind(TestDialect<2, 3>::get(), &t23_rule)
+    .bind(TestDialect<4, 5>::get(), &t45_rule);
  
    loco::apply(&rules).to(g.get());
  
diff --git a/compiler/loco/src/Service/ShapeInference.cpp b/compiler/loco/src/Service/ShapeInference.cpp

index 84eb10963d3419f348aa2260a1142b5e6565557a..d177a48694eeed47432ed3942ead7aa7076a0484 100644 (file)
--- a/compiler/loco/src/Service/ShapeInference.cpp
+++ b/compiler/loco/src/Service/ShapeInference.cpp
@@ -18,8 +18,7 @@
  #include "loco/IR/Algorithm.h"
  
  #include <cassert>
-
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace
  {
@@ -82,7 +81,7 @@ bool ShapeInferenceSession::to(Graph *g) const
        {
          if (_rule->infer(node, shape))
          {
-          node->annot(stdex::make_unique<ShapeAnnotation>(shape));
+          node->annot(std::make_unique<ShapeAnnotation>(shape));
            changed = true;
          }
        }
diff --git a/compiler/loco/src/Service/TypeInference.cpp b/compiler/loco/src/Service/TypeInference.cpp

index 27d7d9a29ab439760619b72f1fb5111053cdb4c7..df038efe724b6b3f886f0a0012d282ad654f7091 100644 (file)
--- a/compiler/loco/src/Service/TypeInference.cpp
+++ b/compiler/loco/src/Service/TypeInference.cpp
@@ -19,8 +19,7 @@
  #include "loco/IR/Algorithm.h"
  
  #include <cassert>
-
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace
  {
@@ -73,7 +72,7 @@ bool TypeInferenceSession::to(Graph *g) const
        {
          if (_rule->infer(node, dtype))
          {
-          node->annot(stdex::make_unique<DataTypeAnnotation>(dtype));
+          node->annot(std::make_unique<DataTypeAnnotation>(dtype));
            changed = true;
          }
        }
diff --git a/compiler/loco/src/Service/TypeInference.test.cpp b/compiler/loco/src/Service/TypeInference.test.cpp

index 13bcfa52b4001241f35e45fcd8163e689132bbbf..0d2cc88649e969e28f59f6f192a166cf44398692 100644 (file)
--- a/compiler/loco/src/Service/TypeInference.test.cpp
+++ b/compiler/loco/src/Service/TypeInference.test.cpp
@@ -268,8 +268,8 @@ TEST(MultiDialectTypeInferenceRuleTest, test1)
    loco::MultiDialectTypeInferenceRule rules;
  
    rules.bind(TestDialect<loco::DataType::S8>::get(), &s8_rule)
-      .bind(TestDialect<loco::DataType::U8>::get(), &u8_rule)
-      .bind(loco::CanonicalDialect::get(), &canon_rule);
+    .bind(TestDialect<loco::DataType::U8>::get(), &u8_rule)
+    .bind(loco::CanonicalDialect::get(), &canon_rule);
  
    loco::apply(&rules).to(g.get());
  
diff --git a/compiler/loco/src/tensorflow.test.cpp b/compiler/loco/src/tensorflow.test.cpp

index f534aee7bba6e66916c6f374fa0a8ef70ff105df..d905429f5b1af7fddfa453bc79d0fad103b93c6b 100644 (file)
--- a/compiler/loco/src/tensorflow.test.cpp
+++ b/compiler/loco/src/tensorflow.test.cpp
@@ -23,9 +23,9 @@
  
  #include <gtest/gtest.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  
  namespace
  {
@@ -65,7 +65,7 @@ loco::Permutation<loco::Domain::Filter> make_HWIO_permutation(void)
    return HWIO;
  }
  
-} // nemaspace
+} // namespace
  
  #if 0
  >>> MaxPool_Float_000 testcase
diff --git a/compiler/locoex-customop/CMakeLists.txt b/compiler/locoex-customop/CMakeLists.txt

index df1e01526bec05f31a1e8c73337c510e57815d07..12356c81b61a0d4d2210d65e0e35411744a17c18 100644 (file)
--- a/compiler/locoex-customop/CMakeLists.txt
+++ b/compiler/locoex-customop/CMakeLists.txt
@@ -5,7 +5,7 @@ list(REMOVE_ITEM SOURCES ${TESTS})
  add_library(locoex_customop SHARED ${SOURCES})
  target_include_directories(locoex_customop PUBLIC include)
  target_link_libraries(locoex_customop PUBLIC loco)
-target_link_libraries(locoex_customop PRIVATE stdex locop pepper_str)
+target_link_libraries(locoex_customop PRIVATE locop pepper_str)
  install(TARGETS locoex_customop DESTINATION lib)
  
  if(NOT ENABLE_TEST)
@@ -15,4 +15,4 @@ endif(NOT ENABLE_TEST)
  nnas_find_package(GTest REQUIRED)
  
  GTest_AddTest(locoex_customop_test ${TESTS})
-target_link_libraries(locoex_customop_test loco locoex_customop stdex)
+target_link_libraries(locoex_customop_test loco locoex_customop)
diff --git a/compiler/locoex-customop/requires.cmake b/compiler/locoex-customop/requires.cmake

index 9127144f2c085da0724b2dd0a324013ea5d57720..c4240bc0916cae4ceee194bd8a284e6d40cdfdcb 100644 (file)
--- a/compiler/locoex-customop/requires.cmake
+++ b/compiler/locoex-customop/requires.cmake
@@ -1,4 +1,3 @@
  require("loco")
-require("stdex")
  require("locop")
  require("pepper-str")
diff --git a/compiler/locoex-customop/src/COpCall.cpp b/compiler/locoex-customop/src/COpCall.cpp

index 0299147581d5d1e709e08b5dc850359165e55652..e86ad5c5bfed461848076ef2b8b64326f6f6166b 100644 (file)
--- a/compiler/locoex-customop/src/COpCall.cpp
+++ b/compiler/locoex-customop/src/COpCall.cpp
@@ -57,7 +57,7 @@ std::vector<std::string> COpCall::attr_names() const
  
  #define INSTANTIATE(AT)                                                                            \
    template const typename AttrTypeTrait<AT>::Type *COpCall::attr<AT>(const std::string &attr_name) \
-      const;
+    const;
  
  INSTANTIATE(COpAttrType::Float)
  INSTANTIATE(COpAttrType::Int)
diff --git a/compiler/locoex-customop/src/COpCall.test.cpp b/compiler/locoex-customop/src/COpCall.test.cpp

index d5f01d22db951f4cf41186384c3ad1831796ba00..7bc4186e55529ed2ce134f28bad3884698799db6 100644 (file)
--- a/compiler/locoex-customop/src/COpCall.test.cpp
+++ b/compiler/locoex-customop/src/COpCall.test.cpp
@@ -20,7 +20,7 @@
  #include <loco/IR/Graph.h>
  #include <loco/IR/Nodes.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -51,8 +51,8 @@ TEST(CallTest, Test_01)
      custom->input(0, inp);
      custom->input(1, inp);
  
-    custom->attr(int_attr, stdex::make_unique<COpAttrInt>(int_val));
-    custom->attr(float_attr, stdex::make_unique<COpAttrFloat>(float_val));
+    custom->attr(int_attr, std::make_unique<COpAttrInt>(int_val));
+    custom->attr(float_attr, std::make_unique<COpAttrFloat>(float_val));
    }
  
    // access custom op input
diff --git a/compiler/locoex-customop/src/VariadicArityNode.test.cpp b/compiler/locoex-customop/src/VariadicArityNode.test.cpp

index a618824e53a726665e3a64c5a3169ea58c5cd7d4..86a9de5cdb0d67a8a334b48dbd9c260e30027728 100644 (file)
--- a/compiler/locoex-customop/src/VariadicArityNode.test.cpp
+++ b/compiler/locoex-customop/src/VariadicArityNode.test.cpp
@@ -47,7 +47,7 @@ class BinaryInputNode : public TestNode
  public:
    BinaryInputNode() : TestNode(2) {}
  };
-}
+} // namespace
  
  TEST(CustomOpTest, VariadicArityNode_arity_0)
  {
diff --git a/compiler/locomotiv/CMakeLists.txt b/compiler/locomotiv/CMakeLists.txt

index 5c0156b78b70b4809d4e917da0f156f8fddc4e8b..308f486199ecefdc62221b9c2e339b7a5184bce7 100644 (file)
--- a/compiler/locomotiv/CMakeLists.txt
+++ b/compiler/locomotiv/CMakeLists.txt
@@ -8,7 +8,6 @@ target_include_directories(locomotiv PUBLIC include)
  target_include_directories(locomotiv PRIVATE src)
  target_link_libraries(locomotiv PUBLIC loco)
  target_link_libraries(locomotiv PUBLIC angkor)
-target_link_libraries(locomotiv PRIVATE stdex)
  # Let's apply nncc common compile options
  #
  # NOTE This will enable strict compilation (warnings as error).
diff --git a/compiler/locomotiv/include/locomotiv/Session.h b/compiler/locomotiv/include/locomotiv/Session.h

index 3268d60b3c7a78854ae822c8399850bbd6f9dd85..85c26c09c5b5958fdc71acac0e884f80a8cf59fa 100644 (file)
--- a/compiler/locomotiv/include/locomotiv/Session.h
+++ b/compiler/locomotiv/include/locomotiv/Session.h
@@ -51,7 +51,7 @@ public:
     * @warn  This approach may fail in case of graph with control flow
     */
    Session(loco::Graph *g, const std::vector<loco::Node *> &custom_outputs)
-      : _graph(g), _outputs(custom_outputs)
+    : _graph(g), _outputs(custom_outputs)
    {
      // DO NOTHING
    }
diff --git a/compiler/locomotiv/requires.cmake b/compiler/locomotiv/requires.cmake

index 1c09aa13dd3e5f7440a4ee7e29c2264980948126..654db88c31a001cb2e9b64b792a38480e174ee30 100644 (file)
--- a/compiler/locomotiv/requires.cmake
+++ b/compiler/locomotiv/requires.cmake
@@ -1,2 +1 @@
  require("angkor")
-require("stdex")
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.cpp b/compiler/locomotiv/src/Node/AvgPool2D.cpp

index 5fdf1e725f662e1a05d33d1fe7eb18629a37c579..0adabd49a2cd1bbf479f6446e04745152de1046b 100644 (file)
--- a/compiler/locomotiv/src/Node/AvgPool2D.cpp
+++ b/compiler/locomotiv/src/Node/AvgPool2D.cpp
@@ -78,9 +78,9 @@ nncc::core::ADT::tensor::Buffer<T> avgPool2D(const loco::AvgPool2D *avgpool2d,
    const uint32_t pad_right = avgpool2d->pad()->right();
  
    const uint32_t output_height =
-      compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+    compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
    const uint32_t output_width =
-      compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+    compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
  
    // prepare output buffer
    Shape output_shape{batches, output_height, output_width, depth};
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.test.cpp b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp

index f9863b47d46a035ab0b3ba971bc7e29aa782a54b..ec5f3cd827088ee76e36f5c082c1e2fdd125ef0c 100644 (file)
--- a/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
+++ b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
@@ -84,7 +84,7 @@ void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shap
    ASSERT_TRUE(*(avgpool2d_data->shape()) == ofm_shape);
  
    auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
    for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
    {
      const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp

index b84fa7e3ca1d8de30acd30623a6d323fa6d1ba3a..0c45cc12fc8afc4dae37777dd6422317b93d7794 100644 (file)
--- a/compiler/locomotiv/src/Node/BiasAdd.cpp
+++ b/compiler/locomotiv/src/Node/BiasAdd.cpp
@@ -55,7 +55,7 @@ void execute_node(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
  
    validate(input_data && bias_data, "Input not ready");
    validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Tensor &&
-               locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+             locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
             "Wrong input domain");
  
    std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, bias_add->axis());
@@ -74,7 +74,7 @@ void execute_node(loco::BiasAdd<loco::Domain::Feature> *bias_add)
  
    validate(input_data && bias_data, "Input not ready");
    validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Feature &&
-               locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+             locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
             "Wrong input domain");
  
    std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, 3);
diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp

index cdf0dfd562ac6a8e04c0183541c79241814bae01..2f9ca5a7e9d3882c56849b46844eb4f27cf4c64c 100644 (file)
--- a/compiler/locomotiv/src/Node/Conv2D.cpp
+++ b/compiler/locomotiv/src/Node/Conv2D.cpp
@@ -82,9 +82,9 @@ Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input
    const uint32_t pad_right = conv2d->pad()->right();
  
    const uint32_t output_height =
-      compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height);
+    compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height);
    const uint32_t output_width =
-      compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width);
+    compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width);
  
    const uint32_t batches = input_shape.dim(0);
    const uint32_t input_depth = input_shape.dim(3);
@@ -121,9 +121,9 @@ Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input
                      ((unsigned)in_y < input_height))
                  {
                    auto input_value =
-                      input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel}));
+                    input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel}));
                    auto filter_value =
-                      filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+                    filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
                    total += (input_value * filter_value);
                  }
                }
diff --git a/compiler/locomotiv/src/Node/Conv2D.test.cpp b/compiler/locomotiv/src/Node/Conv2D.test.cpp

index 66e947accda9fa503a87dfe3583d07f7e174276c..93afa79b7ec0496a0a99ae46ed80c44174d4de70 100644 (file)
--- a/compiler/locomotiv/src/Node/Conv2D.test.cpp
+++ b/compiler/locomotiv/src/Node/Conv2D.test.cpp
@@ -97,7 +97,7 @@ void run_test(const float *ifm, const float *ker, const float *expected_ofm, con
    ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
  
    auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
    for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
    {
      const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp

index f39cd177e00d9b87eb25c07c29b4e65e6a12ca21..a1a8e506f2fe0fbbd5deb3c86949e9259a341075 100644 (file)
--- a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
@@ -89,9 +89,9 @@ Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffe
    const uint32_t pad_right = dw_conv2d->pad()->right();
  
    const uint32_t ofm_height =
-      compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height);
+    compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height);
    const uint32_t ofm_width =
-      compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width);
+    compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width);
  
    const uint32_t batches = ifm_shape.dim(0);
    const uint32_t ifm_depth = ifm_shape.dim(3);
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp

index 1ff333be06dfc32bb7fb43bb011591b1ac3288eb..8a435b6ababf69d912ff631e53e3089f43fdaaef 100644 (file)
--- a/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
@@ -97,7 +97,7 @@ void run_test(const float *ifm, const float *ker, const float *expected_ofm, con
    ASSERT_TRUE(*(dw_conv2d_result->shape()) == ofm_shape);
  
    auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
    for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
    {
      const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp

index 03f5bf83320f5e2c2285f85cc5f14a620d4618fb..e161287ea784e23ab3e479b3ba24e291d9a705c5 100644 (file)
--- a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
@@ -59,8 +59,8 @@ std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilte
  
    // Make HWCM (i.e. height, width, depth, multiplier) buffer from DepthwiseFilterShape
    Buffer<T> node_buf = make_buffer<T, LexicalLayout>(
-      Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(),
-            node_shape.multiplier().value()});
+    Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(),
+          node_shape.multiplier().value()});
  
    // Copy buffer in an order arranged by encoder
    for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp

index 5b2ec9326a6615f1eddabe3a1e307f970a6b3d1b..44364723c9083701205c492020013b4244e2f56b 100644 (file)
--- a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
@@ -62,7 +62,7 @@ TEST(NodeExecution_DepthwiseFilterEncode, f32)
  
    // Encoder to correctly read input tensor as MHWC
    auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>(
-      new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>);
+    new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>);
    encoder->perm()->axis(loco::DepthwiseFilterAxis::Multiplier) = 0;
    encoder->perm()->axis(loco::DepthwiseFilterAxis::Height) = 1;
    encoder->perm()->axis(loco::DepthwiseFilterAxis::Width) = 2;
diff --git a/compiler/locomotiv/src/Node/FeatureCodec.test.cpp b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp

index 1b6b06c132be6cfcb4b6a73d9e2640b24d687b98..dacd0170cc7385e31faa71cba98c580af989719f 100644 (file)
--- a/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
+++ b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
@@ -64,7 +64,7 @@ protected:
                                              const loco::Permutation<loco::Domain::Feature> &perm)
    {
      auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Feature>>(
-        new loco::PermutingEncoder<loco::Domain::Feature>);
+      new loco::PermutingEncoder<loco::Domain::Feature>);
  
      encoder->perm(perm);
  
@@ -80,7 +80,7 @@ protected:
                                              const loco::Permutation<loco::Domain::Feature> &perm)
    {
      auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Feature>>(
-        new loco::PermutingDecoder<loco::Domain::Feature>);
+      new loco::PermutingDecoder<loco::Domain::Feature>);
  
      decoder->perm(perm);
  
diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp

index 8776e1b426a6442d4dda8a795b7bad4169e14247..2877906f9181b15e595b85d0227d8ab5ded95031 100644 (file)
--- a/compiler/locomotiv/src/Node/FeatureDecode.cpp
+++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp
@@ -54,8 +54,8 @@ std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *n
  
    // Make tensor buffer from TensorShape
    Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(),
-                                          node_shape.dim(2).value(), node_shape.dim(3).value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(),
+                                        node_shape.dim(2).value(), node_shape.dim(3).value()});
  
    // Copy buffer in an order arranged by decoder
    for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/FeatureEncode.cpp b/compiler/locomotiv/src/Node/FeatureEncode.cpp

index 406de76ff16886c8a245f33ff93daa6ff881d1f9..c3570b9812ba628c04ab6d1017426b8bfdaf729a 100644 (file)
--- a/compiler/locomotiv/src/Node/FeatureEncode.cpp
+++ b/compiler/locomotiv/src/Node/FeatureEncode.cpp
@@ -54,8 +54,8 @@ std::unique_ptr<locomotiv::NodeData> feature_encode(const loco::FeatureEncode *n
  
    // Make NHWC buffer from FeatureShape
    Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
-                                          node_shape.width().value(), node_shape.depth().value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+                                        node_shape.width().value(), node_shape.depth().value()});
  
    // Copy buffer in an order arranged by encoder
    for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp

index 0e2ac918fb88e7d56a3eebbc172887339058e8f1..84ba681ba63f3f83d73f96bf83a8f236badcd909 100644 (file)
--- a/compiler/locomotiv/src/Node/FilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/FilterEncode.cpp
@@ -54,8 +54,8 @@ std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *nod
  
    // Make NHWC buffer from FilterShape
    Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
-                                          node_shape.width().value(), node_shape.depth().value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+                                        node_shape.width().value(), node_shape.depth().value()});
  
    // Copy buffer in an order arranged by encoder
    for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/FilterEncode.test.cpp b/compiler/locomotiv/src/Node/FilterEncode.test.cpp

index dcca94993a6deb8eedd76c973bae3167f2cd3092..80d108ece3045b876a3aa8eac8ad863b48bfc322 100644 (file)
--- a/compiler/locomotiv/src/Node/FilterEncode.test.cpp
+++ b/compiler/locomotiv/src/Node/FilterEncode.test.cpp
@@ -62,7 +62,7 @@ TEST(NodeExecution_FilterEncode, s32)
  
    // Encoder to correctly read input tensor as NCHW
    auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
-      new loco::PermutingEncoder<loco::Domain::Filter>);
+    new loco::PermutingEncoder<loco::Domain::Filter>);
    encoder->perm()->axis(loco::FilterAxis::Count) = 0;
    encoder->perm()->axis(loco::FilterAxis::Depth) = 1;
    encoder->perm()->axis(loco::FilterAxis::Height) = 2;
@@ -116,7 +116,7 @@ TEST(NodeExecution_FilterEncode, f32)
  
    // Encoder to correctly read input tensor as CHNW
    auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
-      new loco::PermutingEncoder<loco::Domain::Filter>);
+    new loco::PermutingEncoder<loco::Domain::Filter>);
    encoder->perm()->axis(loco::FilterAxis::Depth) = 0;
    encoder->perm()->axis(loco::FilterAxis::Height) = 1;
    encoder->perm()->axis(loco::FilterAxis::Count) = 2;
diff --git a/compiler/locomotiv/src/Node/MatrixCodec.test.cpp b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp

index da4afededad8ef707859e69b4f6522ff7a4fba2b..7f684e41ff7ddac8a59ed6a02455797a4738036c 100644 (file)
--- a/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
+++ b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
@@ -64,7 +64,7 @@ protected:
                                            const loco::Permutation<loco::Domain::Matrix> &perm)
    {
      auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Matrix>>(
-        new loco::PermutingEncoder<loco::Domain::Matrix>);
+      new loco::PermutingEncoder<loco::Domain::Matrix>);
  
      encoder->perm(perm);
  
@@ -80,7 +80,7 @@ protected:
                                            const loco::Permutation<loco::Domain::Matrix> &perm)
    {
      auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Matrix>>(
-        new loco::PermutingDecoder<loco::Domain::Matrix>);
+      new loco::PermutingDecoder<loco::Domain::Matrix>);
  
      decoder->perm(perm);
  
diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp

index 0310015f1df422bc3711b72c8bb400dc07640030..2a65a7b74b464d09c3d17c667b6bfbcb0191a651 100644 (file)
--- a/compiler/locomotiv/src/Node/MatrixDecode.cpp
+++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp
@@ -52,7 +52,7 @@ std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *nod
  
    // Make tensor buffer from TensorShape
    Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()});
  
    // Copy buffer in an order arranged by decoder
    for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/MatrixEncode.cpp b/compiler/locomotiv/src/Node/MatrixEncode.cpp

index e3554e15a3a9e09839ae5c0b08affcfc170335a5..ac51e4256c37b91a72e73eb9ea89a3ff17a3bc5f 100644 (file)
--- a/compiler/locomotiv/src/Node/MatrixEncode.cpp
+++ b/compiler/locomotiv/src/Node/MatrixEncode.cpp
@@ -54,7 +54,7 @@ std::unique_ptr<locomotiv::NodeData> matrix_encode(const loco::MatrixEncode *nod
  
    // Make HW buffer from MatrixShape
    Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()});
  
    // Copy buffer in an order arranged by encoder
    for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp

index 8dce1cb1ea8ff3ba07de710430c7715966f44e73..dc626387b65e066439eb592d2a8a9602c13ead10 100644 (file)
--- a/compiler/locomotiv/src/Node/MaxPool2D.cpp
+++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp
@@ -79,9 +79,9 @@ nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d,
    const uint32_t pad_right = maxpool2d->pad()->right();
  
    const uint32_t output_height =
-      compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+    compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
    const uint32_t output_width =
-      compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+    compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
  
    // prepare output buffer
    Shape output_shape{batches, output_height, output_width, depth};
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.test.cpp b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp

index 5046d4a6e897877ffc2d0ad4cdb5480052bb382f..d00282dd7c1bd3153766a9e812c8a1a02f519413 100644 (file)
--- a/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
+++ b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
@@ -82,7 +82,7 @@ void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shap
    ASSERT_TRUE(*(maxpool2d_data->shape()) == ofm_shape);
  
    auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
    for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
    {
      const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp

index 188bb635b9736cd374b3b15ba22520ee9016c4ff..84da3a3e530e6ad34f13671cc8e2604a2672b73e 100644 (file)
--- a/compiler/locomotiv/src/Node/TensorConcat.cpp
+++ b/compiler/locomotiv/src/Node/TensorConcat.cpp
@@ -52,7 +52,7 @@ void execute_node(loco::TensorConcat *tensor_concat)
    validate(lhs_data->dtype() == rhs_data->dtype(), "lhs and rhs of Concat should have same dtype");
  
    validate(annot_domain(tensor_concat->lhs()) == loco::Domain::Tensor &&
-               annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor,
+             annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor,
             "Some ingredients of TensorConcat is not Tensor");
  
    // Calculate output shape
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp

index bec15a5df8d64fb97f9b4a2bceedda8ecc710b4b..2f3c3d089687b9c30ad37c609b163629c73e833f 100644 (file)
--- a/compiler/locomotiv/src/Node/TransposedConv2D.cpp
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
@@ -65,7 +65,7 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
    locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4");
    locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4");
    locomotiv::validate(input_shape.dim(3) /* depth of input */ ==
-                          filter_shape.dim(3) /* depth of filter */,
+                        filter_shape.dim(3) /* depth of filter */,
                        "channel value mismatch");
  
    const uint32_t input_height = input_shape.dim(1);
@@ -86,9 +86,9 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
    // TODO Support dilations
  
    const uint32_t output_height =
-      compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
+    compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
    const uint32_t output_width =
-      compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
+    compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
  
    const uint32_t batches = input_shape.dim(0);
    const uint32_t input_depth = input_shape.dim(3);
@@ -131,9 +131,9 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
                  {
                    auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel}));
                    auto filter_value =
-                      filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+                    filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
                    output_buf.at(Index({batch, (unsigned)out_y, (unsigned)out_x, out_channel})) +=
-                      input_value * filter_value;
+                    input_value * filter_value;
                  }
                }
              }
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp

index ef759f51ba293636baa68a6c076be98fb30db96f..a516ef9f2555818a6d899acfa42848d31edf52de 100644 (file)
--- a/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
@@ -97,7 +97,7 @@ void run_test(const float *ifm, const float *ker, const float *expected_ofm, con
    ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
  
    auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
    for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
    {
      const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/NodeDataImpl.cpp b/compiler/locomotiv/src/NodeDataImpl.cpp

index 2efebe5a96aa65729c8241ff9b5e92da8eb2dd8f..9373b8dd265ef80bbd739dd135cd1fc961af0d6f 100644 (file)
--- a/compiler/locomotiv/src/NodeDataImpl.cpp
+++ b/compiler/locomotiv/src/NodeDataImpl.cpp
@@ -16,8 +16,7 @@
  
  #include "NodeDataImpl.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace
@@ -59,7 +58,7 @@ template <> NodeDataImpl::NodeDataImpl(const Buffer<float> &buf)
  
  void annot_data(loco::Node *node, std::unique_ptr<NodeData> &&data)
  {
-  node->annot(stdex::make_unique<NodeDataAnnotation>(std::move(data)));
+  node->annot(std::make_unique<NodeDataAnnotation>(std::move(data)));
  }
  
  const NodeData *annot_data(const loco::Node *node)
diff --git a/compiler/locomotiv/src/NodeExecution.h b/compiler/locomotiv/src/NodeExecution.h

index 363188d38d60c16fd9523318153f4c10721d41f7..eb0608d2bab7cd68d7c0ed15c8030b69fc69a895 100644 (file)
--- a/compiler/locomotiv/src/NodeExecution.h
+++ b/compiler/locomotiv/src/NodeExecution.h
@@ -62,7 +62,7 @@ private:
      return dynamic_cast<Derived *>(node);
    }
  
-// clang-format off
+  // clang-format off
    /**
     * @brief Calculate for one specified node and update its result as NodeData.
     *        Abort program when its ingredients are not ready or not supported.
diff --git a/compiler/locomotiv/src/UserData.cpp b/compiler/locomotiv/src/UserData.cpp

index b658ada9b6f7d23dabf95387440944e1a45780fc..98f761efdc807397b0b91bd73807c839e377d0d9 100644 (file)
--- a/compiler/locomotiv/src/UserData.cpp
+++ b/compiler/locomotiv/src/UserData.cpp
@@ -16,8 +16,7 @@
  
  #include "UserData.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace
@@ -55,7 +54,7 @@ const NodeData *user_data(const loco::Node *node)
  
  void user_data(loco::Node *node, std::unique_ptr<NodeData> &&data)
  {
-  node->annot(stdex::make_unique<UserDataAnnotation>(std::move(data)));
+  node->annot(std::make_unique<UserDataAnnotation>(std::move(data)));
  }
  
  void erase_user_data(loco::Node *node) { node->annot<UserDataAnnotation>(nullptr); }
diff --git a/compiler/locop/CMakeLists.txt b/compiler/locop/CMakeLists.txt

index 107ee8be8d856f8edf46073ea373f54f5388bb7b..f02fb1a72ab8f0a6e15dca519abe0fe3dc241ab8 100644 (file)
--- a/compiler/locop/CMakeLists.txt
+++ b/compiler/locop/CMakeLists.txt
@@ -13,7 +13,6 @@ target_link_libraries(locop PUBLIC loco)
  target_link_libraries(locop PRIVATE nncc_common)
  target_link_libraries(locop PUBLIC nncc_coverage)
  target_link_libraries(locop PRIVATE pp)
-target_link_libraries(locop PRIVATE stdex)
  
  if(NOT ENABLE_TEST)
    return()
@@ -23,5 +22,4 @@ endif(NOT ENABLE_TEST)
  nnas_find_package(GTest REQUIRED)
  
  GTest_AddTest(locop_test ${TESTS})
-target_link_libraries(locop_test stdex)
  target_link_libraries(locop_test locop)
diff --git a/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp b/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp

index 61d9e8ae7c89ebe7f72e36e22e5923519366f2d3..75dd39f369b111ddff265ed30b2a4325cb4370ec 100644 (file)
--- a/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp
+++ b/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp
@@ -25,8 +25,6 @@
  
  #include <pp/Format.h>
  
-#include <stdex/Memory.h>
-
  #include <map>
  #include <set>
  
diff --git a/compiler/locop/src/ExampleGraph.h b/compiler/locop/src/ExampleGraph.h

index 76813bcd807550f7130ce231886e070523cda91b..84010f75be983e12f568f76222c26bc150c8eded 100644 (file)
--- a/compiler/locop/src/ExampleGraph.h
+++ b/compiler/locop/src/ExampleGraph.h
@@ -19,7 +19,7 @@
  
  #include <loco.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace
  {
@@ -55,7 +55,7 @@ template <> std::unique_ptr<Bundle<PullPush>> make_bundle(void)
  
    push->from(pull);
  
-  auto res = stdex::make_unique<Bundle<PullPush>>();
+  auto res = std::make_unique<Bundle<PullPush>>();
  
    res->g = std::move(g);
    res->pull = pull;
diff --git a/compiler/locop/src/FormattedGraph.cpp b/compiler/locop/src/FormattedGraph.cpp

index bf41757686e61b0ded087414d20c662d930e69ed..94bfbd2f8d5922da3d97cdfc99161a0fa6a51163 100644 (file)
--- a/compiler/locop/src/FormattedGraph.cpp
+++ b/compiler/locop/src/FormattedGraph.cpp
@@ -23,8 +23,7 @@
  
  #include <pp/Format.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <map>
  #include <set>
  
@@ -300,7 +299,7 @@ void FormattedGraphImpl<Formatter::LinearV1>::dump(std::ostream &os) const
    else
    {
      // Use Built-in NodeSummaryBuilder otherwise
-    node_summary_builder = stdex::make_unique<GenericNodeSummaryBuilder>(&symbols);
+    node_summary_builder = std::make_unique<GenericNodeSummaryBuilder>(&symbols);
    }
  
    // Print Graph Input(s)
diff --git a/compiler/locop/src/FormattedGraph.test.cpp b/compiler/locop/src/FormattedGraph.test.cpp

index aff9ebe5fa36422bde0037c0e3c7c99d869102b1..9f11a4e5d7196fcee33a823ec7e59e5d31ed3741 100644 (file)
--- a/compiler/locop/src/FormattedGraph.test.cpp
+++ b/compiler/locop/src/FormattedGraph.test.cpp
@@ -17,7 +17,7 @@
  #include "locop/FormattedGraph.h"
  #include "ExampleGraph.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -42,7 +42,7 @@ TEST(LinearV1FormatterTest, user_defined_node_summary_builder)
    auto bundle = make_bundle<PullPush>();
    auto g = bundle->graph();
    {
-    bundle->push->annot(stdex::make_unique<MyAnnotation>());
+    bundle->push->annot(std::make_unique<MyAnnotation>());
    }
  
    struct MyBuilder final : public locop::NodeSummaryBuilder
@@ -63,11 +63,11 @@ TEST(LinearV1FormatterTest, user_defined_node_summary_builder)
    {
      std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *) const final
      {
-      return stdex::make_unique<MyBuilder>();
+      return std::make_unique<MyBuilder>();
      }
    };
  
-  std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MyFactory>()) << std::endl;
+  std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MyFactory>()) << std::endl;
  
    // TODO Check whether MyBuilder actually sees all the nodes in a graph
    SUCCEED();
@@ -134,11 +134,11 @@ TEST(LinearV1FormatterTest, node_summary_builder_composition)
    {
      std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tbl) const final
      {
-      return stdex::make_unique<CompositeBuilder>(tbl);
+      return std::make_unique<CompositeBuilder>(tbl);
      }
    };
  
-  std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MyFactory>()) << std::endl;
+  std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MyFactory>()) << std::endl;
  
    // TODO Check whether MyBuilder actually sees all the nodes in a graph
    SUCCEED();
diff --git a/compiler/locop/src/FormattedTensorShape.cpp b/compiler/locop/src/FormattedTensorShape.cpp

index b2b6ea074df9ee2df8937c0c5f75acc06785f675..bc63103137498b0876831f1b3001452f52b679b0 100644 (file)
--- a/compiler/locop/src/FormattedTensorShape.cpp
+++ b/compiler/locop/src/FormattedTensorShape.cpp
@@ -25,7 +25,7 @@ std::ostream &operator<<(std::ostream &os, const loco::Dimension &d)
    return os;
  }
  
-} // namespace
+} // namespace loco
  
  namespace locop
  {
diff --git a/compiler/locop/src/FormattedTensorShape.test.cpp b/compiler/locop/src/FormattedTensorShape.test.cpp

index fc85df3a6cd83ef993801d2727d7e8815974d8dc..626b6cc23c6280901667ccaf2eee27285798a918 100644 (file)
--- a/compiler/locop/src/FormattedTensorShape.test.cpp
+++ b/compiler/locop/src/FormattedTensorShape.test.cpp
@@ -16,7 +16,7 @@
  
  #include "locop/FormattedTensorShape.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -24,12 +24,26 @@ using namespace locop;
  
  TEST(FormattedTensorShapeTest, BracketFormat)
  {
-  auto tensor_shape = stdex::make_unique<loco::TensorShape>();
+  auto tensor_shape = std::make_unique<loco::TensorShape>();
  
    tensor_shape->rank(2);
    tensor_shape->dim(0) = 4;
+  tensor_shape->dim(1) = 8;
  
    std::cout << fmt<TensorShapeFormat::Bracket>(tensor_shape.get()) << std::endl;
  
    SUCCEED();
  }
+
+TEST(FormattedTensorShapeTest, PlainFormat)
+{
+  auto tensor_shape = std::make_unique<loco::TensorShape>();
+
+  tensor_shape->rank(2);
+  tensor_shape->dim(0) = 4;
+  tensor_shape->dim(1) = 8;
+
+  std::cout << fmt<TensorShapeFormat::Plain>(tensor_shape.get()) << std::endl;
+
+  SUCCEED();
+}
diff --git a/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp b/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp

index d688b54906cf63651d032ebe009ce8607e44455a..cfa82c2a2973906fd631b75d5a5e6058599f8f83 100644 (file)
--- a/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp
+++ b/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp
@@ -17,8 +17,7 @@
  #include "locop/GenericNodeSummaryBuilder.h"
  #include "locop/FormattedGraph.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <stdexcept>
  
  #include <gtest/gtest.h>
@@ -44,7 +43,7 @@ TEST(GenericNodeSummaryBuilderTest, simple)
    {
      std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tbl) const final
      {
-      return stdex::make_unique<locop::GenericNodeSummaryBuilder>(tbl);
+      return std::make_unique<locop::GenericNodeSummaryBuilder>(tbl);
      }
    };
  
@@ -52,7 +51,7 @@ TEST(GenericNodeSummaryBuilderTest, simple)
  
    g->nodes()->create<MockNode>();
  
-  std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MockFactory>()) << std::endl;
+  std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MockFactory>()) << std::endl;
  
    SUCCEED();
  }
diff --git a/compiler/locop/src/NodeSummary.cpp b/compiler/locop/src/NodeSummary.cpp

index 3f885699774ed0fe1283f769251f3b4546ab7c31..20250a90fc72c7e25916510924cff1a14a080914 100644 (file)
--- a/compiler/locop/src/NodeSummary.cpp
+++ b/compiler/locop/src/NodeSummary.cpp
@@ -16,8 +16,7 @@
  
  #include "locop/NodeSummary.h"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace locop
@@ -36,6 +35,6 @@ const std::string &NodeDesc::opname(void) const
    return *_name;
  }
  
-void NodeDesc::opname(const std::string &v) { _name = stdex::make_unique<std::string>(v); }
+void NodeDesc::opname(const std::string &v) { _name = std::make_unique<std::string>(v); }
  
-} // namespace loco
+} // namespace locop
diff --git a/compiler/logo-core/src/Phase.test.cpp b/compiler/logo-core/src/Phase.test.cpp

new file mode 100644 (file)

index 0000000..2ee0910
--- /dev/null
+++ b/compiler/logo-core/src/Phase.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+struct Bumblebee final : public logo::Pass
+{
+  const char *name(void) const final { return "Bee"; }
+  bool run(loco::Graph *) final { return false; }
+};
+
+} // namespace
+
+TEST(LogoPhaseSaturateTests, simple)
+{
+  loco::Graph g;
+  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{&g};
+  logo::Phase phase;
+
+  phase.emplace_back(std::make_unique<Bumblebee>());
+  phase_runner.run(phase);
+
+  SUCCEED();
+}
+
+TEST(LogoPhaseRestartTests, simple)
+{
+  loco::Graph g;
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{&g};
+  logo::Phase phase;
+
+  phase.emplace_back(std::make_unique<Bumblebee>());
+  phase_runner.run(phase);
+
+  SUCCEED();
+}
diff --git a/compiler/logo/CMakeLists.txt b/compiler/logo/CMakeLists.txt

index 399cb7586824b6a421ca29efdc32676f16956006..a8efd9b0342755733bf47f25951c93c921bd78de 100644 (file)
--- a/compiler/logo/CMakeLists.txt
+++ b/compiler/logo/CMakeLists.txt
@@ -9,7 +9,6 @@ target_include_directories(logo PUBLIC include)
  target_link_libraries(logo PUBLIC loco)
  target_link_libraries(logo PUBLIC logo_core)
  target_link_libraries(logo PRIVATE locomotiv)
-target_link_libraries(logo PRIVATE stdex)
  
  if(NOT ENABLE_TEST)
    return()
@@ -20,4 +19,3 @@ nnas_find_package(GTest REQUIRED)
  GTest_AddTest(logo_test ${TESTS})
  target_include_directories(logo_test PRIVATE src)
  target_link_libraries(logo_test logo)
-target_link_libraries(logo_test stdex)
diff --git a/compiler/logo/requires.cmake b/compiler/logo/requires.cmake

index 9a7d14788276927587f8ad27815bfc54253c07e7..c761833535f4865670e125de279d0b5f18adbd49 100644 (file)
--- a/compiler/logo/requires.cmake
+++ b/compiler/logo/requires.cmake
@@ -1,4 +1,3 @@
  require("loco")
  require("logo-core")
  require("locomotiv")
-require("stdex")
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.cpp b/compiler/logo/src/Passes/ConstantFoldingPass.cpp

index e038e71405c3c22227e764224bf96a7407845448..2bd4759ca96d72c25ee231ab9fd80461bfa79cc9 100644 (file)
--- a/compiler/logo/src/Passes/ConstantFoldingPass.cpp
+++ b/compiler/logo/src/Passes/ConstantFoldingPass.cpp
@@ -19,8 +19,6 @@
  #include <loco.h>
  #include <loco/IR/CanonicalDialect.h>
  
-#include <stdex/Memory.h>
-
  #include <locomotiv/Session.h>
  
  #include <cassert>
@@ -52,19 +50,19 @@ uint64_t num_elements(const loco::NodeMixin<loco::NodeTrait::TensorShape> &shape
  bool skip(const loco::Node *node)
  {
    static std::set<uint32_t> skip_op = {
-      // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
-      //      `Feature`, `Filter`, `Bias`, etc.
-      static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
-      static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
-      static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
-      static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
-
-      // We don't perform constant folding for Push
-      static_cast<uint32_t>(loco::CanonicalOpcode::Push),
-
-      // TensorBroadcast is a good hint for optimization
-      // TODO Let this option be controlled by driver using logo
-      static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
+    // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
+    //      `Feature`, `Filter`, `Bias`, etc.
+    static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
+
+    // We don't perform constant folding for Push
+    static_cast<uint32_t>(loco::CanonicalOpcode::Push),
+
+    // TensorBroadcast is a good hint for optimization
+    // TODO Let this option be controlled by driver using logo
+    static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
    };
  
    if (node->dialect() == loco::CanonicalDialect::get())
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp b/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp

index b9c4942c4258d39ce0296cd746e871d1cc3ac822..5d222eb00b9e9ee084ef5a33954899dd45e10241 100644 (file)
--- a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp
+++ b/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp
@@ -24,6 +24,21 @@
  
  using namespace logo::test;
  
+TEST(ConstantFoldingTest, name)
+{
+  logo::ConstantFoldingPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ConstantFoldingTest, run_NEG)
+{
+  loco::Graph g;
+  logo::ConstantFoldingPass pass;
+
+  ASSERT_FALSE(pass.run(&g));
+}
+
  namespace
  {
  
diff --git a/compiler/logo/src/Passes/EmptyTestGraph.h b/compiler/logo/src/Passes/EmptyTestGraph.h

new file mode 100644 (file)

index 0000000..67f2c8a
--- /dev/null
+++ b/compiler/logo/src/Passes/EmptyTestGraph.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_EMPTY_TEST_GRAPH_H__
+#define __LOGO_EMPTY_TEST_GRAPH_H__
+
+#include <loco.h>
+
+namespace logo
+{
+
+void create_empty_test_net(loco::Graph *graph);
+
+} // namespace logo
+
+#endif // __LOGO_EMPTY_TEST_GRAPH_H__
diff --git a/compiler/logo/src/Passes/EmptyTestGraph.test.cpp b/compiler/logo/src/Passes/EmptyTestGraph.test.cpp

new file mode 100644 (file)

index 0000000..46750b7
--- /dev/null
+++ b/compiler/logo/src/Passes/EmptyTestGraph.test.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+namespace logo
+{
+
+void create_empty_test_net(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto const_node = graph->nodes()->create<loco::ConstGen>();
+  {
+    const_node->dtype(loco::DataType::FLOAT32);
+    const_node->rank(1);
+    const_node->dim(0) = 1;
+    const_node->size<loco::DataType::FLOAT32>(1);
+    const_node->at<loco::DataType::FLOAT32>(0) = 1.0f;
+  }
+
+  auto push_node = graph->nodes()->create<loco::Push>();
+  {
+    push_node->from(const_node);
+  }
+
+  auto graph_output = graph->outputs()->create();
+  {
+    graph_output->name("output");
+    graph_output->dtype(loco::DataType::FLOAT32);
+    loco::link(graph_output, push_node);
+  }
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/RemoveDeadNodePass.test.cpp b/compiler/logo/src/Passes/RemoveDeadNodePass.test.cpp

new file mode 100644 (file)

index 0000000..c0ecbda
--- /dev/null
+++ b/compiler/logo/src/Passes/RemoveDeadNodePass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveDeadNodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveDeadNodePassTest, name)
+{
+  logo::RemoveDeadNodePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDeadNodePassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::RemoveDeadNodePass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/RemoveDeadNodeWithQueryPass.test.cpp b/compiler/logo/src/Passes/RemoveDeadNodeWithQueryPass.test.cpp

new file mode 100644 (file)

index 0000000..f14bfc3
--- /dev/null
+++ b/compiler/logo/src/Passes/RemoveDeadNodeWithQueryPass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveDeadNodeWithQueryPass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveDeadNodeWithQueryPassTest, name)
+{
+  logo::RemoveDeadNodeWithQueryPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDeadNodeWithQueryPassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::RemoveDeadNodeWithQueryPass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/RemoveForwardNodePass.test.cpp b/compiler/logo/src/Passes/RemoveForwardNodePass.test.cpp

new file mode 100644 (file)

index 0000000..bb905ae
--- /dev/null
+++ b/compiler/logo/src/Passes/RemoveForwardNodePass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveForwardNodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveForwardNodePassTest, name)
+{
+  logo::RemoveForwardNodePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveForwardNodePassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::RemoveForwardNodePass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/ReorderDecodePass.test.cpp b/compiler/logo/src/Passes/ReorderDecodePass.test.cpp

new file mode 100644 (file)

index 0000000..f8e158d
--- /dev/null
+++ b/compiler/logo/src/Passes/ReorderDecodePass.test.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ReorderDecodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ReorderDecodePassTest, TensorBiasAdd_name)
+{
+  logo::ReorderDecodePass<loco::TensorBiasAdd> pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ReorderDecodePassTest, ReLU_name)
+{
+  logo::ReorderDecodePass<loco::ReLU> pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ReorderDecodePassTest, TensorBiasAdd_run_NEG)
+{
+  loco::Graph g;
+  logo::ReorderDecodePass<loco::TensorBiasAdd> pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
+
+TEST(ReorderDecodePassTest, ReLU_run_NEG)
+{
+  loco::Graph g;
+  logo::ReorderDecodePass<loco::ReLU> pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/ResolveDuplicateReshapePass.test.cpp b/compiler/logo/src/Passes/ResolveDuplicateReshapePass.test.cpp

new file mode 100644 (file)

index 0000000..de2df6f
--- /dev/null
+++ b/compiler/logo/src/Passes/ResolveDuplicateReshapePass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ResolveDuplicateReshapePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveDuplicateReshapePassTest, name)
+{
+  logo::ResolveDuplicateReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ResolveDuplicateReshapePassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::ResolveDuplicateReshapePass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/ResolveRedundantReshapePass.test.cpp b/compiler/logo/src/Passes/ResolveRedundantReshapePass.test.cpp

new file mode 100644 (file)

index 0000000..9a7e958
--- /dev/null
+++ b/compiler/logo/src/Passes/ResolveRedundantReshapePass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ResolveRedundantReshapePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveRedundantReshapePassTest, name)
+{
+  logo::ResolveRedundantReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ResolveRedundantReshapePassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::ResolveRedundantReshapePass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp

index 0bda85b6fab53a48be48461915f5685ccfec9d3e..500f086230c4a9358763223f80e189dbdb35d52f 100644 (file)
--- a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
+++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
@@ -20,8 +20,7 @@
  #include <loco/IR/CanonicalDialect.h>
  #include <loco/IR/CanonicalNode.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <set>
  #include <vector>
  #include <cassert>
@@ -231,8 +230,8 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
            perm_vec[to] = from;
          }
  
-        transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
-            encode_node, decode_node, encode_node->input(), perm_vec));
+        transposeCandidates.insert(
+          std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
        }
      }
  
@@ -293,8 +292,8 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
            perm_vec[to] = from;
          }
  
-        transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
-            encode_node, decode_node, encode_node->input(), perm_vec));
+        transposeCandidates.insert(
+          std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
        }
      }
  
@@ -377,8 +376,8 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
            perm_vec[to] = from;
          }
  
-        transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
-            encode_node, decode_node, encode_node->input(), perm_vec));
+        transposeCandidates.insert(
+          std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
        }
      }
  
@@ -397,7 +396,7 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
  
        TransposeCtx(loco::Node *first, loco::Node *last, loco::Node *input,
                     std::vector<loco::TensorAxis> perm)
-          : first_node(first), last_node(last), input_node(input), perm_vec(perm)
+        : first_node(first), last_node(last), input_node(input), perm_vec(perm)
        { /* empty */
        }
      };
diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp

index 9a05763b4176261266e35e6de285747963586f08..75a288089b2f486d46a98f3d5a44c0d5416b385b 100644 (file)
--- a/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp
+++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp
@@ -19,10 +19,26 @@
  #include "TestHelper.h"
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  #include <gtest/gtest.h>
  
+TEST(SimplifyDomainConversionPassTest, name)
+{
+  logo::SimplifyDomainConversionPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(SimplifyDomainConversionPassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::SimplifyDomainConversionPass pass;
+
+  ASSERT_FALSE(pass.run(&g));
+}
+
  namespace
  {
  
@@ -65,7 +81,7 @@ template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *inp
  {
    loco::Graph *g = input_for_decode->graph();
  
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
  
    decoder->perm(perm<T>());
  
@@ -80,7 +96,7 @@ template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *inp
  {
    loco::Graph *g = input_for_encode->graph();
  
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
  
    encoder->perm(perm<T>());
  
diff --git a/compiler/luci-eval-driver/CMakeLists.txt b/compiler/luci-eval-driver/CMakeLists.txt

new file mode 100644 (file)

index 0000000..990f9d1
--- /dev/null
+++ b/compiler/luci-eval-driver/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(SRCS_EVAL_TESTER
+      src/EvalDriver.cpp
+   )
+
+add_executable(luci_eval_driver ${SRCS_EVAL_TESTER})
+target_link_libraries(luci_eval_driver PRIVATE oops)
+target_link_libraries(luci_eval_driver PRIVATE loco)
+target_link_libraries(luci_eval_driver PRIVATE luci_import)
+target_link_libraries(luci_eval_driver PRIVATE luci_export)
+target_link_libraries(luci_eval_driver PRIVATE luci_lang)
+target_link_libraries(luci_eval_driver PRIVATE luci_interpreter)
+target_link_libraries(luci_eval_driver PRIVATE safemain)
diff --git a/compiler/luci-eval-driver/requires.cmake b/compiler/luci-eval-driver/requires.cmake

new file mode 100644 (file)

index 0000000..2904d9d
--- /dev/null
+++ b/compiler/luci-eval-driver/requires.cmake
@@ -0,0 +1,5 @@
+require("oops")
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("safemain")
diff --git a/compiler/luci-eval-driver/src/EvalDriver.cpp b/compiler/luci-eval-driver/src/EvalDriver.cpp

new file mode 100644 (file)

index 0000000..4762cff
--- /dev/null
+++ b/compiler/luci-eval-driver/src/EvalDriver.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Importer.h>
+#include <luci_interpreter/Interpreter.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+#include <string>
+
+namespace
+{
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.read(data, data_size).fail())
+    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+std::unique_ptr<luci::Module> importModel(const std::string &filename)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+  {
+    throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
+  }
+  std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
+                               std::istreambuf_iterator<char>());
+  return luci::Importer().importModule(circle::GetModel(model_data.data()));
+}
+
+template <typename NodeT> size_t getTensorSize(const NodeT *node)
+{
+  uint32_t tensor_size = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    tensor_size *= node->dim(i).value();
+  return tensor_size;
+}
+
+} // namespace
+
+/*
+ * @brief EvalDriver main
+ *
+ *        Driver for testing luci-inerpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+  if (argc != 5)
+  {
+    std::cerr
+      << "Usage: " << argv[0]
+      << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+    return EXIT_FAILURE;
+  }
+
+  const char *filename = argv[1];
+  const int32_t num_inputs = atoi(argv[2]);
+  const char *input_prefix = argv[3];
+  const char *output_file = argv[4];
+
+  // Load model from the file
+  std::unique_ptr<luci::Module> module = importModel(filename);
+  if (module == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // Create interpreter.
+  luci_interpreter::Interpreter interpreter(module.get());
+
+  // Set input.
+  // Data for n'th input is read from ${input_prefix}n
+  // (ex: Add.circle.input0, Add.circle.input1 ..)
+  const auto input_nodes = loco::input_nodes(module->graph());
+  assert(num_inputs == input_nodes.size());
+  for (int32_t i = 0; i < num_inputs; i++)
+  {
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+    std::vector<char> input_data(getTensorSize(input_node));
+    readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
+                     input_data.size());
+    interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+  }
+
+  // Do inference.
+  interpreter.interpret();
+
+  // Get output.
+  const auto output_nodes = loco::output_nodes(module->graph());
+  for (int i = 0; i < module->graph()->outputs()->size(); i++)
+  {
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+    std::vector<char> output_data(getTensorSize(output_node));
+    interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+    // Output data is written in ${output_file}
+    // (ex: Add.circle.output0)
+    // Output shape is written in ${output_file}.shape
+    // (ex: Add.circle.output0.shape)
+    writeDataToFile(std::string(output_file) + std::to_string(i), output_data.data(),
+                    output_data.size());
+    // In case of Tensor output is Scalar value.
+    // The output tensor with rank 0 is treated as a scalar with shape (1)
+    if (output_node->rank() == 0)
+    {
+      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", "1", 1);
+    }
+    else
+    {
+      auto shape_str = std::to_string(output_node->dim(0).value());
+      for (int j = 1; j < output_node->rank(); j++)
+      {
+        shape_str += ",";
+        shape_str += std::to_string(output_node->dim(j).value());
+      }
+      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", shape_str.c_str(),
+                      shape_str.size());
+    }
+  }
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp

index 639ffc1f0ca689326a81271453089a1552283bce..b57b691d08857c0663b61d5972be3292f0846c6d 100644 (file)
--- a/compiler/luci-interpreter/src/Interpreter.cpp
+++ b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -31,7 +31,7 @@ class EventNotifierImpl final : public EventNotifier
  public:
    EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
                      const std::vector<ExecutionObserver *> &observers)
-      : _runtime_to_ir(runtime_to_ir), _observers(observers)
+    : _runtime_to_ir(runtime_to_ir), _observers(observers)
    {
    }
  
diff --git a/compiler/luci-interpreter/src/core/Kernel.h b/compiler/luci-interpreter/src/core/Kernel.h

index 5f5efb2193182a77a7c36bd24335011a3886f471..5cdb2e360c1273a295440437a8e720c3f75e12fd 100644 (file)
--- a/compiler/luci-interpreter/src/core/Kernel.h
+++ b/compiler/luci-interpreter/src/core/Kernel.h
@@ -29,7 +29,7 @@ class Kernel
  {
  protected:
    Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
-      : _inputs(std::move(inputs)), _outputs(std::move(outputs))
+    : _inputs(std::move(inputs)), _outputs(std::move(outputs))
    {
    }
  
@@ -59,7 +59,7 @@ template <typename Params> class KernelWithParams : public Kernel
  protected:
    KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
                     const Params &params)
-      : Kernel(std::move(inputs), std::move(outputs)), _params(params)
+    : Kernel(std::move(inputs), std::move(outputs)), _params(params)
    {
    }
  
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h

index b74be797baa238b8dd68a9ab2b4b1c7991a61f5b..dab6ba25f138c7bf39e79d34f43be2f48bfbb076 100644 (file)
--- a/compiler/luci-interpreter/src/core/KernelParams.h
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -44,6 +44,7 @@ struct ArgMaxParams
  struct ConcatenationParams
  {
    int axis;
+  Activation activation;
  };
  
  struct Conv2DParams
@@ -111,6 +112,12 @@ struct MulParams
    Activation activation;
  };
  
+struct PackParams
+{
+  int32_t values_count;
+  int32_t axis;
+};
+
  struct Pool2DParams
  {
    Padding padding;
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp

index 57f6fed44b23feff14c5c9a8bcbccdf8e2f6c947..fb0ad304ba64879968e553728abb9483648c0b61 100644 (file)
--- a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -94,7 +94,7 @@ void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
  }
  
  RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module)
-    : _owning_module(owning_module), _tensor_alloc_plan(std::make_unique<TensorAllocPlan>())
+  : _owning_module(owning_module), _tensor_alloc_plan(std::make_unique<TensorAllocPlan>())
  {
  }
  
diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp

index 6e0424ffac1f80958ce3ee4cfd61629d55aa5936..a9e7be0a9a55060f62f470722943ce5e39323331 100644 (file)
--- a/compiler/luci-interpreter/src/core/Tensor.cpp
+++ b/compiler/luci-interpreter/src/core/Tensor.cpp
@@ -24,8 +24,8 @@ namespace luci_interpreter
  
  Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
                 std::string name)
-    : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
-      _name(std::move(name)), _data_allocated(false)
+  : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
+    _name(std::move(name)), _data_allocated(false)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp

index 8d119d51620dce7cff5d8209dc266ce0cc504da2..7381c384969b313130862fe64c4e9e14ddf3e01d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Add.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.cpp
@@ -31,7 +31,7 @@ namespace kernels
  {
  
  Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
-    : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+  : KernelWithParams<AddParams>({input1, input2}, {output}, params)
  {
  }
  
@@ -76,13 +76,13 @@ void Add::evalFloat() const
    params.float_activation_max = activation_max;
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
  
    if (need_broadcast)
    {
      tflite::reference_ops::BroadcastAdd4DSlow(
-        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
    }
    else
    {
@@ -130,14 +130,13 @@ void Add::evalQuantized() const
    params.quantized_activation_max = activation_max;
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
  
    if (need_broadcast)
    {
      tflite::reference_ops::BroadcastAdd4DSlow(
-        params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
-        getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
-        getTensorData<uint8_t>(output()));
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
    }
    else
    {
@@ -176,12 +175,12 @@ void Add::evalQuantizedS16() const
      const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
      const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
      const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
-        shifted_input1_val, input1_multiplier, input1_shift);
+      shifted_input1_val, input1_multiplier, input1_shift);
      const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
-        shifted_input2_val, input2_multiplier, input2_shift);
+      shifted_input2_val, input2_multiplier, input2_shift);
      const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
      const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
-        raw_sum, output_multiplier, output_shift);
+      raw_sum, output_multiplier, output_shift);
      const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
      return static_cast<int16_t>(clamped_output);
    };
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp

index de8a3bbb0d22b96f3feadf464b328bc694e17172..5ad9beb30b1548cc430f7fcbaa887c469913c92d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -40,29 +40,29 @@ TEST(AddTest, Uint8)
    std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                                              1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
    std::initializer_list<int32_t> test_shapes[] = {
-      {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+    {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
    std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
    std::initializer_list<int32_t> output_shapes[] = {
-      {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
    std::vector<std::vector<float>> output_data = {
-      {-0.1f, 2.6f,  -0.7f, 2.8f,  0.7f,  3.0f,  1.1f, 0.8f,  0.5f, 1.0f,  1.9f, 1.4f,
-       1.0f,  -0.8f, 0.4f,  -0.6f, 1.8f,  -0.2f, 1.4f, 3.0f,  0.8f, 3.0f,  2.2f, 3.0f,
-       -1.4f, 0.3f,  -2.0f, 0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
-      {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
-      {-0.1f, 2.5f,  0.0f,  2.6f,  -0.7f, 1.9f,  1.1f, 0.7f,  1.2f, 0.8f,  0.5f, 0.1f,
-       1.0f,  -0.9f, 1.1f,  -0.8f, 0.4f,  -1.5f, 1.7f, 3.0f,  2.2f, 3.0f,  2.1f, 3.0f,
-       -1.1f, 0.5f,  -0.6f, 1.0f,  -0.7f, 0.9f,  1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
-      {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
+    {-0.1f, 2.6f,  -0.7f, 2.8f,  0.7f,  3.0f,  1.1f, 0.8f,  0.5f, 1.0f,  1.9f, 1.4f,
+     1.0f,  -0.8f, 0.4f,  -0.6f, 1.8f,  -0.2f, 1.4f, 3.0f,  0.8f, 3.0f,  2.2f, 3.0f,
+     -1.4f, 0.3f,  -2.0f, 0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+    {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
+    {-0.1f, 2.5f,  0.0f,  2.6f,  -0.7f, 1.9f,  1.1f, 0.7f,  1.2f, 0.8f,  0.5f, 0.1f,
+     1.0f,  -0.9f, 1.1f,  -0.8f, 0.4f,  -1.5f, 1.7f, 3.0f,  2.2f, 3.0f,  2.1f, 3.0f,
+     -1.1f, 0.5f,  -0.6f, 1.0f,  -0.7f, 0.9f,  1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+    {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
    float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
    for (int i = 0; i < output_data.size(); i++)
    {
      Tensor input1_tensor =
-        makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
      Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
                                                           quant_param.second, test_data);
      Tensor output_tensor =
-        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
      AddParams params{};
      params.activation = Activation::NONE;
@@ -81,9 +81,9 @@ TEST(AddTest, Uint8)
      Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
                                                           quant_param.second, test_data);
      Tensor input2_tensor =
-        makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
      Tensor output_tensor =
-        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
      AddParams params{};
      params.activation = Activation::NONE;
@@ -103,14 +103,14 @@ TEST(AddTest, Float)
    Shape base_shape = {2, 3, 1, 2};
    std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
    std::vector<std::vector<float>> test_outputs = {
-      {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
-       1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
-       0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
-      {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
-      {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
-       1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
-       0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
-      {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+    {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+     1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+     0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+    {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+    {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+     1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+     0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+    {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
    std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                                   1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
    std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
@@ -128,7 +128,7 @@ TEST(AddTest, Float)
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
-        << "With shape number " << i;
+      << "With shape number " << i;
    }
    // Re-run with exchanged inputs.
    for (size_t i = 0; i < test_shapes.size(); ++i)
@@ -145,7 +145,7 @@ TEST(AddTest, Float)
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
-        << "With shape number " << i;
+      << "With shape number " << i;
    }
  }
  
@@ -154,26 +154,26 @@ TEST(AddTest, SInt16)
    Shape base_shape = {2, 3, 1, 2};
    std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
    std::vector<std::vector<int32_t>> ref_output_shapes{
-      {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
  
    std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                                   1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
    std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
    std::vector<std::vector<float>> ref_outputs = {
-      {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
-       1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
-       0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
-      {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
-      {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
-       1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
-       0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
-      {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+    {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+     1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+     0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+    {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+    {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+     1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+     0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+    {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
  
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
      Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
      Tensor input2_tensor =
-        makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+      makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
      Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
      const float tolerance = output_tensor.scale();
  
@@ -186,15 +186,15 @@ TEST(AddTest, SInt16)
  
      EXPECT_THAT(extractTensorShape(output_tensor),
                  ::testing::ElementsAreArray(ref_output_shapes[i]))
-        << "With shape number " << i;
+      << "With shape number " << i;
      EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
-        << "With shape number " << i;
+      << "With shape number " << i;
    }
    // Re-run with exchanged inputs and different scales.
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
      Tensor input1_tensor =
-        makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
+      makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
      Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
      Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
      const float tolerance = output_tensor.scale();
@@ -208,9 +208,9 @@ TEST(AddTest, SInt16)
  
      EXPECT_THAT(extractTensorShape(output_tensor),
                  ::testing::ElementsAreArray(ref_output_shapes[i]))
-        << "With shape number " << i;
+      << "With shape number " << i;
      EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
-        << "With shape number " << i;
+      << "With shape number " << i;
    }
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.cpp

index 5c464ed09dc71a4b939b07f94758e6957aa1d6df..2437d5762ac7f431d8066cd3c78a9e45679c97c8 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ArgMax.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.cpp
@@ -24,7 +24,7 @@ namespace kernels
  {
  
  ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params)
-    : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+  : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
  {
  }
  
@@ -60,11 +60,10 @@ void ArgMax::configure()
  void ArgMax::execute() const
  {
  
-#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                     \
-  tflite::optimized_ops::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
-                                   getTensorData<axis_type>(axis()), getTensorShape(output()), \
-                                   getTensorData<output_type>(output()),                       \
-                                   std::greater<data_type>())
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                        \
+  tflite::optimized_ops::ArgMinMax(                                                               \
+    getTensorShape(input()), getTensorData<data_type>(input()), getTensorData<axis_type>(axis()), \
+    getTensorShape(output()), getTensorData<output_type>(output()), std::greater<data_type>())
    if (axis()->element_type() == DataType::S32)
    {
      switch (_params.output_type)
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp

index c6734a114d34e085ee3c5d42ad8735f725b3cb10..3362edbf69b6a0495c19647418d05d273f52d205 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -60,14 +60,14 @@ TYPED_TEST(ArgMaxTest, Simple)
                              /*output_shape=*/{1, 1, 1},
                              /*input_data=*/
                              {
-                                1, 9, 7, 3,
+                              1, 9, 7, 3, //
                              },
                              /*dimension_data=*/{3}, /*output_data=*/{1});
    Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
                              /*output_shape=*/{1, 1, 1},
                              /*input_data=*/
                              {
-                                1, 9, 7, 3,
+                              1, 9, 7, 3, //
                              },
                              /*dimension_data=*/{3}, /*output_data=*/{1});
  }
@@ -78,14 +78,16 @@ TYPED_TEST(ArgMaxTest, MultiDimensions)
                              /*output_shape=*/{1, 1, 2},
                              /*input_data=*/
                              {
-                                1, 2, 7, 8, 1, 9, 7, 3,
+                              1, 2, 7, 8, //
+                              1, 9, 7, 3, //
                              },
                              /*dimension_data=*/{3}, /*output_data=*/{3, 1});
    Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
                              /*output_shape=*/{1, 1, 2},
                              /*input_data=*/
                              {
-                                1, 2, 7, 8, 1, 9, 7, 3,
+                              1, 2, 7, 8, //
+                              1, 9, 7, 3, //
                              },
                              /*dimension_data=*/{3}, /*output_data=*/{3, 1});
  }
@@ -93,7 +95,8 @@ TYPED_TEST(ArgMaxTest, MultiDimensions)
  TEST(ArgMaxTest, UnsupportedType_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
-                                                                             1, 2, 7, 8, 1, 9, 7, 3,
+                                                                           1, 2, 7, 8, //
+                                                                           1, 9, 7, 3, //
                                                                           });
    Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
    Tensor output_tensor = makeOutputTensor(DataType::U8);
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp

index df54f978662c7c0028b78d5e2d02990cd7e4db3d..65ea4c09e7edb0b295ed351fc468caa9b827d18d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -30,7 +30,7 @@ namespace kernels
  {
  
  AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
  {
  }
  
@@ -51,15 +51,15 @@ void AveragePool2D::configure()
    const int32_t input_width = input_shape.dim(2);
    const int32_t depth = input_shape.dim(3);
  
-  const int32_t output_height = computeOutputSize(_params.padding, input_height,
-                                                  _params.filter_height, _params.stride_height);
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
    const int32_t output_width =
-      computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
  
    _padding_height =
-      computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
    _padding_width =
-      computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
    if (input()->element_type() == DataType::U8)
    {
      LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
@@ -149,8 +149,8 @@ void AveragePool2D::evalSInt16() const
    params.quantized_activation_max = activation_max;
  
    tflite::reference_integer_ops::AveragePool(
-      params, getTensorShape(input()), getTensorData<int16_t>(input()), //
-      getTensorShape(output()), getTensorData<int16_t>(output()));
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp

index 83e48c89d23521d92e08703ebf6754cd21e63c93..4d7dab86aa78986057b6b4fa1cfe5a1e8bb3f1ae 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -30,9 +30,9 @@ TEST(AveragePool2DTest, Float)
  {
    Shape input_shape{1, 3, 5, 1};
    std::vector<float> input_data{
-      -4, -3, -2, -1, 0,  //
-      1,  2,  3,  4,  5,  //
-      6,  7,  8,  9,  10, //
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -50,8 +50,8 @@ TEST(AveragePool2DTest, Float)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      0, 1.5, //
-      4.5, 6, //
+    0, 1.5, //
+    4.5, 6, //
    };
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
@@ -60,12 +60,12 @@ TEST(AveragePool2DTest, Float)
  TEST(AveragePool2DTest, Uint8_0)
  {
    std::vector<float> input_data{
-      0,  -6, 12, 4, //
-      -3, -2, 10, 7, //
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
    };
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
-                                                      quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Pool2DParams params{};
@@ -87,13 +87,13 @@ TEST(AveragePool2DTest, Uint8_0)
  TEST(AveragePool2DTest, Uint8_1)
  {
    std::vector<float> input_data{
-      0, 6, 12, 4, //
-      3, 2, 10, 7, //
+    0, 6, 12, 4, //
+    3, 2, 10, 7, //
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
-                                                      quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Pool2DParams params{};
@@ -117,13 +117,13 @@ TEST(AveragePool2DTest, SInt16)
    Shape input_shape{1, 3, 5, 1};
    std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
    std::vector<float> input_data{
-      -4, -3, -2, -1, 0,  //
-      1,  2,  3,  4,  5,  //
-      6,  7,  8,  9,  10, //
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
    };
    std::vector<float> ref_output_data{
-      0, 1.5, //
-      4.5, 6, //
+    0, 1.5, //
+    4.5, 6, //
    };
    Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
@@ -148,9 +148,9 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
  {
    Shape input_shape{1, 3, 5};
    std::vector<float> input_data{
-      -4, -3, -2, -1, 0,  //
-      1,  2,  3,  4,  5,  //
-      6,  7,  8,  9,  10, //
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -171,9 +171,9 @@ TEST(AveragePool2DTest, In_Out_Type_NEG)
  {
    Shape input_shape{1, 3, 5, 1};
    std::vector<float> input_data{
-      -4, -3, -2, -1, 0,  //
-      1,  2,  3,  4,  5,  //
-      6,  7,  8,  9,  10, //
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8);
@@ -193,8 +193,8 @@ TEST(AveragePool2DTest, In_Out_Type_NEG)
  TEST(AveragePool2DTest, Quant_Param_NEG)
  {
    std::vector<float> input_data{
-      0,  -6, 12, 4, //
-      -3, -2, 10, 7, //
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
    };
  
    std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp

new file mode 100644 (file)

index 0000000..591fcc0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                               Tensor *output)
+  : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *crops_data = crops()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+  for (int i = 0; i < spatial_dims_num * 2; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+  }
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+    output_batch_size = output_batch_size / block_shape_data[i];
+    output_shape.dim(i + 1) =
+      input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+  }
+
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::optimized_ops::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::optimized_ops::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h

new file mode 100644 (file)

index 0000000..57703ea
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchToSpaceND : public Kernel
+{
+public:
+  BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *crops() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp

new file mode 100644 (file)

index 0000000..a29981d
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
+{
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
+  Tensor crops_tensor = makeInputTensor<DataType::S32>(crops_shape, crops_data);
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class BatchToSpaceNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(BatchToSpaceNDTest, DataTypes);
+
+TYPED_TEST(BatchToSpaceNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2},
+                   /*output_shape=*/{1, 4, 4, 1},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                   /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0},
+                   /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h

index 62bd4158e98fe7d2c2f484135edfc5785549b475..2d2842a9e8ec6c1df1af976913e8d2361dc5e611 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h
+++ b/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h
@@ -38,7 +38,7 @@ void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape,
    if (unextended_input1_shape == unextended_input2_shape)
    {
      const int flat_size = tflite::MatchingElementsSize(
-        unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+      unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
      for (int i = 0; i < flat_size; ++i)
      {
        output_data[i] = op(input1_data[i], input2_data[i]);
@@ -60,8 +60,8 @@ void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape,
  
      auto fn = [&](int indexes[N]) {
        output_data[SubscriptToIndex(output_desc, indexes)] =
-          op(input1_data[SubscriptToIndex(desc1, indexes)],
-             input2_data[SubscriptToIndex(desc2, indexes)]);
+        op(input1_data[SubscriptToIndex(desc1, indexes)],
+           input2_data[SubscriptToIndex(desc2, indexes)]);
      };
      tflite::NDOpsHelper<N>(output_desc, fn);
    }
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt

index a07589dca97164f74eca60bded72da0e89f74709..d7ab76374eb1c51bb985d0a12f681275d6848926 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -1,5 +1,4 @@
  find_package(Threads REQUIRED)
-nnas_find_package(GTest REQUIRED)
  
  set(SOURCES
      Add.h
@@ -8,6 +7,8 @@ set(SOURCES
      ArgMax.cpp
      AveragePool2D.h
      AveragePool2D.cpp
+    BatchToSpaceND.h
+    BatchToSpaceND.cpp
      Concatenation.h
      Concatenation.cpp
      Conv2D.h
@@ -70,8 +71,12 @@ set(SOURCES
      Minimum.cpp
      Mul.h
      Mul.cpp
+    Neg.h
+    Neg.cpp
      NotEqual.h
      NotEqual.cpp
+    Pack.h
+    Pack.cpp
      Pad.h
      Pad.cpp
      Pow.h
@@ -96,6 +101,8 @@ set(SOURCES
      Slice.cpp
      Softmax.h
      Softmax.cpp
+    SpaceToBatchND.h
+    SpaceToBatchND.cpp
      SpaceToDepth.h
      SpaceToDepth.cpp
      Split.h
@@ -104,6 +111,8 @@ set(SOURCES
      StridedSlice.cpp
      Sqrt.h
      Sqrt.cpp
+    SquaredDifference.h
+    SquaredDifference.cpp
      Squeeze.h
      Squeeze.cpp
      Sub.h
@@ -135,11 +144,17 @@ target_link_libraries(luci_interpreter_kernels
      PUBLIC luci_interpreter_core
      PRIVATE nncc_common Threads::Threads)
  
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
  
  set(TEST_SOURCES
      Add.test.cpp
      ArgMax.test.cpp
      AveragePool2D.test.cpp
+    BatchToSpaceND.test.cpp
      Concatenation.test.cpp
      Conv2D.test.cpp
      DepthToSpace.test.cpp
@@ -171,7 +186,9 @@ set(TEST_SOURCES
      Mean.test.cpp
      Minimum.test.cpp
      Mul.test.cpp
+    Neg.test.cpp
      NotEqual.test.cpp
+    Pack.test.cpp
      Pad.test.cpp
      Pow.test.cpp
      Prelu.test.cpp
@@ -184,10 +201,12 @@ set(TEST_SOURCES
      Rsqrt.test.cpp
      Slice.test.cpp
      Softmax.test.cpp
+    SpaceToBatchND.test.cpp
      SpaceToDepth.test.cpp
      Split.test.cpp
      StridedSlice.test.cpp
      Sqrt.test.cpp
+    SquaredDifference.test.cpp
      Squeeze.test.cpp
      Sub.test.cpp
      Tanh.test.cpp
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp

index 6f88204466b9b395d912d725a82cba530c589364..e3376c13d942b246e708b916c9e1423be7bc2e06 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -29,7 +29,7 @@ namespace kernels
  
  Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
                               const ConcatenationParams &params)
-    : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
+  : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
  {
  }
  
@@ -39,6 +39,9 @@ void Concatenation::configure()
    LUCI_INTERPRETER_CHECK(num_inputs > 0);
    const Tensor *t0 = _inputs[0];
  
+  // TODO: Support concat with fused activation function
+  LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE);
+
    int axis = _params.axis;
    if (axis < 0)
      axis += t0->shape().num_dims();
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp

index 91707a2561592df8d41e21c3db53818e8ebcf447..ee9b7d0d3076fb64218f4fbd69da2e18518cd6fd 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -38,6 +38,7 @@ TEST(ConcatenationTest, Float)
    // Try different 'axis' and expect different results.
    {
      params.axis = 0;
+    params.activation = luci::FusedActFunc::NONE;
  
      Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
      kernel.configure();
@@ -48,6 +49,7 @@ TEST(ConcatenationTest, Float)
    }
    {
      params.axis = -2; // Same as '0'.
+    params.activation = luci::FusedActFunc::NONE;
  
      Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
      kernel.configure();
@@ -58,6 +60,7 @@ TEST(ConcatenationTest, Float)
    }
    {
      params.axis = 1;
+    params.activation = luci::FusedActFunc::NONE;
  
      Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
      kernel.configure();
@@ -68,6 +71,7 @@ TEST(ConcatenationTest, Float)
    }
    {
      params.axis = -1; // Same as '1'.
+    params.activation = luci::FusedActFunc::NONE;
  
      Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
      kernel.configure();
@@ -84,6 +88,7 @@ TEST(ConcatenationTest, Input_Number_Check_NEG)
    ConcatenationParams params{};
  
    params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
  
    Concatenation kernel({}, &output_tensor, params);
    EXPECT_ANY_THROW(kernel.configure());
@@ -99,6 +104,7 @@ TEST(ConcatenationTest, Invalid_Axis_NEG)
    ConcatenationParams params{};
  
    params.axis = -3;
+  params.activation = luci::FusedActFunc::NONE;
  
    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
    EXPECT_ANY_THROW(kernel.configure());
@@ -114,6 +120,7 @@ TEST(ConcatenationTest, Mismatching_Input_Type_NEG)
    ConcatenationParams params{};
  
    params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
  
    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
    EXPECT_ANY_THROW(kernel.configure());
@@ -129,6 +136,7 @@ TEST(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
    ConcatenationParams params{};
  
    params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
  
    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
    EXPECT_ANY_THROW(kernel.configure());
@@ -144,6 +152,7 @@ TEST(ConcatenationTest, Mismatching_Input_Dimension_NEG)
    ConcatenationParams params{};
  
    params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
  
    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
    EXPECT_ANY_THROW(kernel.configure());
@@ -159,6 +168,24 @@ TEST(ConcatenationTest, Unsupported_Configure_Type_NEG)
    ConcatenationParams params{};
  
    params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+// TODO: Remove this test when concat w/ fused_activation is supported
+TEST(ConcatenationTest, With_Fused_Activation_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = 1;
+  params.activation = luci::FusedActFunc::RELU;
  
    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
    EXPECT_ANY_THROW(kernel.configure());
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp

index c5069e403e78be578429f16aa64686bf60eb6409..56ca96a349ab94c52e43743387c1534a1a6bc8fa 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -31,7 +31,7 @@ namespace kernels
  
  Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
                 const Conv2DParams &params)
-    : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params)
+  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params)
  {
  }
  
@@ -84,11 +84,11 @@ void Conv2D::configure()
                                                 bias()->shape().dim(0) == output_depth));
  
    const int32_t output_height =
-      computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
-                        _params.dilation_height_factor);
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
    const int32_t output_width =
-      computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
-                        _params.dilation_width_factor);
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
  
    _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
                                     input_height, filter_height, output_height);
@@ -100,11 +100,11 @@ void Conv2D::configure()
    // Allocate tensor for Im2Col, if needed.
    // The checks here should be aligned with the actual implementation.
    const bool need_dilated_im2col =
-      _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
+    _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
    const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
                                         filter_height != 1 || filter_width != 1;
    const bool need_im2col =
-      input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
+    input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
    if (need_im2col)
    {
      const int input_depth = input_shape.dim(3);
@@ -113,7 +113,7 @@ void Conv2D::configure()
      try
      {
        _im2col =
-          std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, "");
+        std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, "");
      }
      catch (std::bad_alloc &ba)
      {
@@ -174,16 +174,31 @@ void Conv2D::evalFloat() const
    params.float_activation_max = activation_max;
  
    if (_im2col)
-    tflite::optimized_ops::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
-                                getTensorShape(filter()), getTensorData<float>(filter()),
-                                getTensorShape(bias()), getTensorData<float>(bias()),
-                                getTensorShape(output()), getTensorData<float>(output()),
-                                getTensorShape(_im2col.get()), getTensorData<float>(_im2col.get()));
-  else
-    tflite::reference_ops::Conv(
+  {
+    try
+    {
+      tflite::optimized_ops::Conv(
+        params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+        getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+        getTensorShape(output()), getTensorData<float>(output()), getTensorShape(_im2col.get()),
+        getTensorData<float>(_im2col.get()));
+    }
+    catch (std::bad_alloc &ba)
+    {
+      // Failed memory allocation
+      _im2col->deallocate();
+
+      tflite::reference_ops::Conv(
          params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
          getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
          getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr);
+    }
+  }
+  else
+    tflite::reference_ops::Conv(
+      params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+      getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr);
  }
  
  void Conv2D::evalQuantized() const
@@ -223,10 +238,10 @@ void Conv2D::evalQuantized() const
    gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
  
    tflite::optimized_ops::Conv(
-      params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
-      getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-      getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()),
-      getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
+    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()),
+    getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
  }
  
  void Conv2D::evalQuantizedPerChannel() const
@@ -260,10 +275,10 @@ void Conv2D::evalQuantizedPerChannel() const
    calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
  
    const std::vector<double> effective_output_scale =
-      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
  
    const std::vector<ChannelQuantMultipliers> multipliers_raw =
-      quantizeMultipliers(effective_output_scale);
+    quantizeMultipliers(effective_output_scale);
    BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
  
    for (int32_t batch = 0; batch < batches; ++batch)
@@ -288,9 +303,9 @@ void Conv2D::evalQuantizedPerChannel() const
                  for (int32_t in_c = 0; in_c < input_depth; ++in_c)
                  {
                    const uint8_t input_val =
-                      input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
                    const uint8_t filter_val =
-                      filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
                    acc += static_cast<int32_t>(input_val - input()->zero_point()) *
                           static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
                  }
@@ -303,7 +318,7 @@ void Conv2D::evalQuantizedPerChannel() const
            }
  
            int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
-              acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+            acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
  
            scaled_acc += output()->zero_point();
            scaled_acc = std::max(scaled_acc, activation_min);
@@ -346,10 +361,10 @@ void Conv2D::evalQuantizedS16() const
    calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
  
    const std::vector<double> effective_output_scale =
-      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
  
    const std::vector<ChannelQuantMultipliers> multipliers_raw =
-      quantizeMultipliers(effective_output_scale);
+    quantizeMultipliers(effective_output_scale);
    BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
  
    for (int32_t batch = 0; batch < batches; ++batch)
@@ -374,9 +389,9 @@ void Conv2D::evalQuantizedS16() const
                  for (int32_t in_c = 0; in_c < input_depth; ++in_c)
                  {
                    const int16_t input_val =
-                      input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
                    const int16_t filter_val =
-                      filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
                    acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
                  }
                }
@@ -388,7 +403,7 @@ void Conv2D::evalQuantizedS16() const
            }
  
            int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
-              acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
+            acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
  
            scaled_acc = std::max(scaled_acc, activation_min);
            scaled_acc = std::min(scaled_acc, activation_max);
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp

index 35a0c5491dc1aa9f23fc5ded32120cd4457ee351..8610a4fe636ad87e258ff462b172d71802ad5b28 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -32,16 +32,16 @@ TEST(Conv2DTest, Float)
    Shape filter_shape{2, 2, 2, 2};
    Shape bias_shape{2};
    std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
    };
    std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -62,8 +62,8 @@ TEST(Conv2DTest, Float)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      11, 16, 7, 20, // row = 0
-      0,  40, 0, 44, // row = 1
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
    };
    std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
@@ -76,17 +76,17 @@ TEST(Conv2DTest, FloatCheck)
    Shape filter_shape{3, 2, 2, 1};
    Shape bias_shape{3};
    std::vector<float> input_data{
-      // First batch
-      1, 1, 1, 1, // row = 1
-      2, 2, 2, 2, // row = 2
-      // Second batch
-      1, 2, 3, 4, // row = 1
-      1, 2, 3, 4, // row = 2
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+    // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
    };
    std::vector<float> filter_data{
-      1,  2,  3,  4, // first 2x2 filter
-      -1, 1,  -1, 1, // second 2x2 filter
-      -1, -1, 1,  1, // third 2x2 filter
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
    };
    std::vector<float> bias_data{1, 2, 3};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -107,10 +107,10 @@ TEST(Conv2DTest, FloatCheck)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      18, 2, 5, // first batch, left
-      18, 2, 5, // first batch, right
-      17, 4, 3, // second batch, left
-      37, 4, 3, // second batch, right
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
    };
    std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
@@ -120,17 +120,17 @@ TEST(Conv2DTest, FloatCheck)
  TEST(Conv2DTest, Uint8)
  {
    std::vector<float> input_data{
-      // First batch
-      1, 1, 1, 1, // row = 1
-      2, 2, 2, 2, // row = 2
-                  // Second batch
-      1, 2, 3, 4, // row = 1
-      1, 2, 3, 4, // row = 2
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
    };
    std::vector<float> filter_data{
-      1,  2,  3,  4, // first 2x2 filter
-      -1, 1,  -1, 1, // second 2x2 filter
-      -1, -1, 1,  1, // third 2x2 filter
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
    };
    std::vector<float> bias_data{1, 2, 3};
  
@@ -142,9 +142,9 @@ TEST(Conv2DTest, Uint8)
    Tensor filter_tensor = makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first,
                                                         input_quant_param.second, filter_data);
    Tensor bias_tensor = makeInputTensor<DataType::S32>(
-      {3}, input_quant_param.first * input_quant_param.first, 0, bias_data);
+    {3}, input_quant_param.first * input_quant_param.first, 0, bias_data);
    Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
    Conv2DParams params{};
    params.padding = Padding::VALID;
@@ -159,10 +159,10 @@ TEST(Conv2DTest, Uint8)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      18, 2, 5, // first batch, left
-      18, 2, 5, // first batch, right
-      17, 4, 3, // second batch, left
-      37, 4, 3, // second batch, right
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
    };
    std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
@@ -173,17 +173,17 @@ TEST(Conv2DTest, Uint8_CWQ)
  {
    const int output_channels = 3;
    std::vector<float> input_data{
-      // First batch
-      1, 1, 1, 1, // row = 1
-      2, 2, 2, 2, // row = 2
-                  // Second batch
-      1, 2, 3, 4, // row = 1
-      1, 2, 3, 4, // row = 2
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
    };
    std::vector<float> filter_data{
-      1,  2,  3,  4, // first 2x2 filter
-      -1, 1,  -1, 1, // second 2x2 filter
-      -1, -1, 1,  1, // third 2x2 filter
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
    };
    std::vector<float> bias_data{1, 2, 3};
    Shape filter_shape{output_channels, 2, 2, 1};
@@ -212,11 +212,11 @@ TEST(Conv2DTest, Uint8_CWQ)
    Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first,
                                                        input_quant_param.second, input_data);
    Tensor filter_tensor =
-      makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data);
+    makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data);
    Tensor bias_tensor =
-      makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
+    makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
    Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
    Conv2DParams params{};
    params.padding = Padding::VALID;
@@ -231,10 +231,10 @@ TEST(Conv2DTest, Uint8_CWQ)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      18, 2, 5, // first batch, left
-      18, 2, 5, // first batch, right
-      17, 4, 3, // second batch, left
-      37, 4, 3, // second batch, right
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
    };
    std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
@@ -249,21 +249,21 @@ TEST(Conv2DTest, SInt16)
    std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
  
    std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
    };
    std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2};
    std::vector<float> ref_output_data{
-      11, 16, 7, 20, // row = 0
-      0,  40, 0, 44, // row = 1
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
    };
  
    Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data);
@@ -295,22 +295,22 @@ TEST(Conv2DTest, SInt16_CWQ_weights)
    std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
  
    std::vector<float> input_data{
-      1, 2, // row = 0, col 0
-      3, 4, // row = 0, col 1
-      5, 6, // row = 1, col 0
-      7, 8, // row = 1, col 1
+    1, 2, // row = 0, col 0
+    3, 4, // row = 0, col 1
+    5, 6, // row = 1, col 0
+    7, 8, // row = 1, col 1
    };
    std::vector<float> filter_data{
-      4, -3, // out = 0
-      1, -3, // out = 1
-      5, -3, // out = 2
+    4, -3, // out = 0
+    1, -3, // out = 1
+    5, -3, // out = 2
    };
    std::vector<float> bias_data{1, 10, 5};
    std::vector<float> ref_output_data{
-      0, 5, 4,  // row 0, col 0
-      1, 1, 8,  // row 0, col 1
-      3, 0, 12, // row 1, col 0
-      5, 0, 16, // row 1, col 1
+    0, 5, 4,  // row 0, col 0
+    1, 1, 8,  // row 0, col 1
+    3, 0, 12, // row 1, col 0
+    5, 0, 16, // row 1, col 1
    };
  
    float input_scale = 0.25f;
@@ -323,7 +323,7 @@ TEST(Conv2DTest, SInt16_CWQ_weights)
  
    Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
    Tensor filter_tensor =
-      makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
+    makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
    Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
    Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
  
@@ -349,16 +349,16 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
    Shape filter_shape{2, 2, 2, 2};
    Shape bias_shape{2};
    std::vector<int32_t> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
    };
    std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2};
    Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
@@ -384,16 +384,16 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG)
    Shape filter_shape{2, 2, 2, 2};
    Shape bias_shape{2};
    std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
    };
    std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<uint8_t> bias_data{1, 2};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -419,16 +419,16 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG)
    Shape filter_shape{2, 2, 2, 2};
    Shape bias_shape{3};
    std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
    };
    std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2, 3};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -454,16 +454,16 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG)
    Shape filter_shape{2, 2, 2, 2};
    Shape bias_shape{2};
    std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
    };
    std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp

index 57238313c5bc2b5ca1cd5077f9679e74b94246d2..f2b9e4ccc85a93b18aeed0a30bab975f2cea2a21 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -24,7 +24,7 @@ namespace kernels
  {
  
  DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
-    : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+  : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp

index 92113319195d3a7d942eaf70a5e106583bbd997f..1452f44212c94f296feeb509ac35bd7b39089b75 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -30,7 +30,7 @@ namespace kernels
  
  DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
                                   Tensor *output, const DepthwiseConv2DParams &params)
-    : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
+  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
  {
  }
  
@@ -85,11 +85,11 @@ void DepthwiseConv2D::configure()
                                                 bias()->shape().dim(0) == channels_out));
  
    const int32_t output_height =
-      computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
-                        _params.dilation_height_factor);
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
    const int32_t output_width =
-      computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
-                        _params.dilation_width_factor);
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
  
    _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
                                     input_height, filter_height, output_height);
@@ -149,9 +149,9 @@ void DepthwiseConv2D::evalFloat() const
    params.float_activation_max = activation_max;
  
    tflite::reference_ops::DepthwiseConv(
-      params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-      getTensorShape(output()), getTensorData<float>(output()));
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
  }
  
  void DepthwiseConv2D::evalQuantizedPerChannel() const
@@ -185,10 +185,10 @@ void DepthwiseConv2D::evalQuantizedPerChannel() const
    calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
  
    const std::vector<double> effective_output_scales =
-      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
  
    std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
-      quantizeMultipliers(effective_output_scales);
+    quantizeMultipliers(effective_output_scales);
    BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
  
    for (int batch = 0; batch < batches; ++batch)
@@ -213,13 +213,13 @@ void DepthwiseConv2D::evalQuantizedPerChannel() const
                  const int in_y = in_y_origin + dilation_height_factor * filter_y;
                  // Zero padding by omitting the areas outside the image.
                  const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
                  if (is_point_inside_image)
                  {
                    int32 input_val =
-                      input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
                    int32 filter_val =
-                      filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
                    acc += (filter_val - filter()->zero_points()[output_channel]) *
                           (input_val - input()->zero_point());
                  }
@@ -232,12 +232,12 @@ void DepthwiseConv2D::evalQuantizedPerChannel() const
              int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
              int output_shift = quant_multipliers[output_channel].shift;
              int32_t scaled_acc =
-                tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
              scaled_acc += output()->zero_point();
              scaled_acc = std::max(scaled_acc, activation_min);
              scaled_acc = std::min(scaled_acc, activation_max);
              output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
-                static_cast<uint8_t>(scaled_acc);
+              static_cast<uint8_t>(scaled_acc);
            }
          }
        }
@@ -278,9 +278,9 @@ void DepthwiseConv2D::evalQuantized() const
    params.quantized_activation_max = activation_max;
  
    tflite::reference_ops::DepthwiseConv(
-      params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
-      getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-      getTensorShape(output()), getTensorData<uint8_t>(output()));
+    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
  }
  
  void DepthwiseConv2D::evalQuantizedS16() const
@@ -310,10 +310,10 @@ void DepthwiseConv2D::evalQuantizedS16() const
    const int32_t depth_multiplier = _params.depth_multiplier;
  
    const std::vector<double> effective_output_scales =
-      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
  
    std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
-      quantizeMultipliers(effective_output_scales);
+    quantizeMultipliers(effective_output_scales);
  
    BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
  
@@ -344,9 +344,9 @@ void DepthwiseConv2D::evalQuantizedS16() const
                  if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
                  {
                    const int16_t input_val =
-                      input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
                    const int16_t filter_val =
-                      filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
                    acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
                  }
                }
@@ -359,7 +359,7 @@ void DepthwiseConv2D::evalQuantizedS16() const
              int32_t output_multiplier = quant_multipliers[out_c].multiplier;
              int output_shift = quant_multipliers[out_c].shift;
              int32_t scaled_acc =
-                tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
  
              scaled_acc = std::max(scaled_acc, activation_min);
              scaled_acc = std::min(scaled_acc, activation_max);
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp

index f79e888a170e813bba81aba01b60922d8ef2fec0..3e2f434dd1474a574f75dbc3de175ae43c8ba9ff 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -32,16 +32,16 @@ TEST(DepthwiseConv2DTest, Float)
    Shape filter_shape{1, 2, 2, 4};
    Shape bias_shape{4};
    std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -63,8 +63,8 @@ TEST(DepthwiseConv2DTest, Float)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      71,  0, 99,  0,  //
-      167, 0, 227, 28, //
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
    };
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
@@ -73,15 +73,15 @@ TEST(DepthwiseConv2DTest, Float)
  TEST(DepthwiseConv2DTest, Uint8)
  {
    std::vector<float> input_data{
-      1, 2, 7,  8,  // column 1
-      3, 4, 9,  10, // column 2
-      5, 6, 11, 12, // column 3
+    1, 2, 7,  8,  // column 1
+    3, 4, 9,  10, // column 2
+    5, 6, 11, 12, // column 3
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
  
@@ -93,9 +93,9 @@ TEST(DepthwiseConv2DTest, Uint8)
    Tensor filter_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first,
                                                         input_quant_param.second, filter_data);
    Tensor bias_tensor = makeInputTensor<DataType::S32>(
-      {4}, input_quant_param.first * input_quant_param.first, 0, bias_data);
+    {4}, input_quant_param.first * input_quant_param.first, 0, bias_data);
    Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -111,8 +111,8 @@ TEST(DepthwiseConv2DTest, Uint8)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      71, -34, 99,  -20, //
-      91, -26, 127, -4,  //
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
    };
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
@@ -126,21 +126,21 @@ TEST(DepthwiseConv2DTest, SInt16)
    std::vector<int32_t> ref_output_shape{1, 2, 1, 4};
  
    std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
    std::vector<float> ref_output_data{
-      71,  0, 99,  0,  //
-      167, 0, 227, 28, //
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
    };
  
    Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data);
@@ -174,21 +174,21 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights)
    std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
  
    std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
    std::vector<float> ref_output_data{
-      71,  0, 99,  0,  //
-      167, 0, 227, 28, //
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
    };
  
    float input_scale = 0.25;
@@ -199,7 +199,7 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights)
    std::vector<int32_t> zerop(4, 0);
    Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
    Tensor filter_tensor =
-      makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, filter_data);
+    makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, filter_data);
    Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
  
@@ -229,20 +229,20 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
    std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
  
    std::vector<float> input_data{
-      1, 2, 7,  8,  //
-      3, 4, 9,  10, //
-      5, 6, 11, 12, //
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
    std::vector<float> ref_output_data{
-      71, -34, 99,  -20, //
-      91, -26, 127, -4,  //
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
    };
  
    std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16);
@@ -270,10 +270,10 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
    Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
                                                        input_quant_param.second, input_data);
    Tensor filter_tensor =
-      makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data);
+    makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data);
    Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data);
    Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -299,16 +299,16 @@ TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
    Shape filter_shape{1, 2, 2, 4};
    Shape bias_shape{4};
    std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<int32_t> bias_data{1, 2, 3, 4};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -335,16 +335,16 @@ TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
    Shape filter_shape{1, 2, 2, 4};
    Shape bias_shape{4};
    std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -371,16 +371,16 @@ TEST(DepthwiseConv2DTest, InvalidInputShape_NEG)
    Shape filter_shape{2, 2, 4};
    Shape bias_shape{4};
    std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -407,16 +407,16 @@ TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG)
    Shape filter_shape{2, 1, 2, 4};
    Shape bias_shape{4};
    std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -443,16 +443,16 @@ TEST(DepthwiseConv2DTest, InvalidBiasDim_NEG)
    Shape filter_shape{1, 2, 4, 2};
    Shape bias_shape{4};
    std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
    };
    std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp

index e75876b3a74a2c69b6e8b43b7ff5b916e6c8f843..db1496d376e86668995d5ce6673feb19aaee4328 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Div.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.cpp
@@ -26,7 +26,7 @@ namespace kernels
  {
  
  Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
-    : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+  : KernelWithParams<DivParams>({input1, input2}, {output}, params)
  {
  }
  
@@ -63,13 +63,13 @@ void Div::evalFloat() const
    params.float_activation_min = activation_min;
    params.float_activation_max = activation_max;
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
  
    if (need_broadcast)
    {
      tflite::reference_ops::BroadcastDivSlow(
-        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
    }
    else
    {
@@ -107,14 +107,13 @@ void Div::evalQuantized() const
    params.quantized_activation_max = activation_max;
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
  
    if (need_broadcast)
    {
      tflite::reference_ops::BroadcastDivSlow(
-        params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
-        getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
-        getTensorData<uint8_t>(output()));
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
    }
    else
    {
diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp

index 77eb2e9c111d98ecb5f21dff17a5d536b5b1327b..1a0c4af1582f6db9f58a86154be7d66eb609c84e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Div.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp
@@ -99,12 +99,12 @@ TEST(DivTest, Uint8)
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
  
    Tensor input1_tensor =
-      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input1_data);
+    makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input1_data);
    Tensor input2_tensor =
-      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input2_data);
+    makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input2_data);
  
    Tensor output_tensor =
-      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+    makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
    DivParams params{};
    params.activation = Activation::RELU;
diff --git a/compiler/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-interpreter/src/kernels/Elu.test.cpp

index 0235d6552cabda4109cc36c09a9263b6f6eff9e7..e26eed03eec9dcae017ff6ff53ce525b78ae05f5 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Elu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
@@ -43,25 +43,25 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  TEST(EluTest, SimpleElu)
  {
    Check(
-      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
-      /*input_data=*/
-      {
-          0, -6, 2, -4,    //
-          3, -2, 10, -0.1, //
-      },
-      /*output_data=*/
-      {
-          0.0, -0.997521, 2.0, -0.981684,   //
-          3.0, -0.864665, 10.0, -0.0951626, //
-      });
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, -6, 2, -4,    //
+      3, -2, 10, -0.1, //
+    },
+    /*output_data=*/
+    {
+      0.0, -0.997521, 2.0, -0.981684,   //
+      3.0, -0.864665, 10.0, -0.0951626, //
+    });
  }
  
  TEST(EluTest, InOutTypeMismatch_NEG)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      0, -6, 2,  -4,   //
-      3, -2, 10, -0.1, //
+    0, -6, 2,  -4,   //
+    3, -2, 10, -0.1, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8);
diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp

index fb0de8bbf4dbc14cfdfb538dfa453d99ac6dbf86..ba2827ba98f916daaa894764cbe2faf0b63be23e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Equal.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
@@ -30,18 +30,18 @@ using namespace testing;
  TEST(EqualTest, FloatSimple)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
-      -1,  0,   1,   // Row 2
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      false, true, false, // Row 1
-      false, true, false, // Row 2
+    false, true, false, // Row 1
+    false, true, false, // Row 2
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
@@ -59,21 +59,21 @@ TEST(EqualTest, FloatSimple)
  TEST(EqualTest, FloatBroardcast)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
-      -1,  0,   1,   // Row 3
-      0.9, 0.7, 0.5, // Row 4
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+    0.9, 0.7, 0.5, // Row 4
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
+    0.9, 0.7, 0.5, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      false, true,  false, // Row 1
-      false, false, false, // Row 2
-      false, false, false, // Row 3
-      true,  true,  true,  // Row 4
+    false, true,  false, // Row 1
+    false, false, false, // Row 2
+    false, false, false, // Row 3
+    true,  true,  true,  // Row 4
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
@@ -95,27 +95,27 @@ const float F_MAX = 127.0 / 128.0;
  TEST(EqualTest, Uint8Quantized)
  {
    std::vector<float> x_data{
-      0.5, 0.5, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.5, 0.55, 0.5, // Row 1
-      -1,  0,   0.05, 1,   // Row 2
+    0.9, 0.5, 0.55, 0.5, // Row 1
+    -1,  0,   0.05, 1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      false, true, false, false, // Row 1
-      false, true, true,  false, // Row 2
+    false, true, false, false, // Row 1
+    false, true, true,  false, // Row 2
    };
  
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
-                                                  x_quant_param.second, x_data);
+  Tensor x_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
  
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
-                                                  y_quant_param.second, y_data);
+  Tensor y_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
  
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
@@ -130,28 +130,28 @@ TEST(EqualTest, Uint8Quantized)
  TEST(EqualTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
-      0.4,  -0.8, 0.7,  0.3, // Row 1
-      -0.5, 0.1,  0,    0.5, // Row 2
-      1,    0,    0.05, -1,  // Row 3
-      -1,   0.05, 0,    1,   // Row 4
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+    -1,   0.05, 0,    1,   // Row 4
    };
  
    std::vector<float> y_data{
-      -1, 0.05, 0, 1, // Row 1
+    -1, 0.05, 0, 1, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      false, false, false, false, // Row 1
-      false, false, true,  false, // Row 2
-      false, false, false, false, // Row 3
-      true,  true,  true,  true,  // Row 4
+    false, false, false, false, // Row 1
+    false, false, true,  false, // Row 2
+    false, false, false, false, // Row 3
+    true,  true,  true,  true,  // Row 4
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Equal kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-interpreter/src/kernels/Floor.test.cpp

index 3e1ab6f3a12f01ef17e91f37be0e5a61eefcc432..d90d611d903c9f35f9c1b39a30a243cd2606a41b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Floor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Floor.test.cpp
@@ -30,14 +30,14 @@ TEST(FloorTest, SimpleFloat)
  {
    std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      0.2, 8.6, 2.4,  4.3,  // Row 1
-      3,   7.1, 10.5, -0.9, // Row 2
+    0.2, 8.6, 2.4,  4.3,  // Row 1
+    3,   7.1, 10.5, -0.9, // Row 2
    };
  
    std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1};
    std::vector<float> ref_output_data{
-      0, 8, 2,  4,  // Row 1
-      3, 7, 10, -1, // Row 2
+    0, 8, 2,  4,  // Row 1
+    3, 7, 10, -1, // Row 2
    };
  
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.cpp

index b6f36cea30fe27566892127c23e2340b800fb45b..a7a10a336885b615f903ea0d67acad2ed6bb0c87 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FloorDiv.cpp
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.cpp
@@ -28,7 +28,7 @@ namespace kernels
  {
  
  FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
-    : Kernel({input, alpha}, {output})
+  : Kernel({input, alpha}, {output})
  {
  }
  
@@ -70,14 +70,14 @@ void FloorDiv::evalFloat() const
    if (x()->shape() != y()->shape())
    {
      tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
-        getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
-        getTensorData<float>(output()), FloorDivFunc);
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
    }
    else
    {
      tflite::reference_ops::BinaryFunction<float, float, float>(
-        getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
-        getTensorData<float>(output()), FloorDivFunc);
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
    }
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp

index a5bc700f7fa4bb3114c83e85507166af87652cc5..16831ca801905a740db632d3c8eeb60c1663104e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -31,20 +31,20 @@ TEST(FloorDivTest, FloatSimple)
  {
    Shape x_shape{2, 3};
    std::vector<float> x_data{
-      0.5, 2.4,  3.1,  // Row 1
-      1.9, -1.9, -2.8, // Row 2
+    0.5, 2.4,  3.1,  // Row 1
+    1.9, -1.9, -2.8, // Row 2
    };
  
    Shape y_shape = x_shape;
    std::vector<float> y_data{
-      2.0, 0.5,  3.0,  // Row 1
-      1.0, -1.0, -2.0, // Row 2
+    2.0, 0.5,  3.0,  // Row 1
+    1.0, -1.0, -2.0, // Row 2
    };
  
    std::vector<int32_t> ref_output_shape{2, 3};
    std::vector<float> ref_output_data{
-      0, 4, 1, // Row 1
-      1, 1, 1, // Row 2
+    0, 4, 1, // Row 1
+    1, 1, 1, // Row 2
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
@@ -64,21 +64,21 @@ TEST(FloorDivTest, FloatBroadcast)
  {
    Shape x_shape{1, 3};
    std::vector<float> x_data{
-      0.5, 2.4, -3.1, // Row 1
+    0.5, 2.4, -3.1, // Row 1
    };
  
    Shape y_shape{3, 3};
    std::vector<float> y_data{
-      1.0, 1.0,  1.0,  // Row 1
-      2.0, -0.5, -2.0, // Row 2
-      0.3, 0.7,  0.9,  // Row 3
+    1.0, 1.0,  1.0,  // Row 1
+    2.0, -0.5, -2.0, // Row 2
+    0.3, 0.7,  0.9,  // Row 3
    };
  
    std::vector<int32_t> ref_output_shape{3, 3};
    std::vector<float> ref_output_data{
-      0, 2,  -4, // Row 1
-      0, -5, 1,  // Row 2
-      1, 3,  -4, // Row 3
+    0, 2,  -4, // Row 1
+    0, -5, 1,  // Row 2
+    1, 3,  -4, // Row 3
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp

index 7fa76d5e71264aac3aaf0fc822625c830a583dd1..48433b42d6df38723bec93c7e4d70f37f0c99c5d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -30,7 +30,7 @@ namespace kernels
  
  FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
                                 Tensor *output, const FullyConnectedParams &params)
-    : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
+  : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
  {
  }
  
@@ -97,9 +97,9 @@ void FullyConnected::evalFloat() const
    params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
  
    tflite::reference_ops::FullyConnected(
-      params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
-      getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
-      getTensorShape(output()), getTensorData<float>(output()));
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
+    getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
  }
  
  void FullyConnected::evalQuantized() const
@@ -110,7 +110,7 @@ void FullyConnected::evalQuantized() const
    int32_t output_activation_max;
    int32_t output_multiplier;
    real_multiplier =
-      getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
    quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
    calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
                                      &output_activation_max);
@@ -130,9 +130,9 @@ void FullyConnected::evalQuantized() const
    op_params.lhs_cacheable = false;
    op_params.rhs_cacheable = false;
    tflite::reference_ops::FullyConnected(
-      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-      getTensorShape(weights()), getTensorData<uint8_t>(weights()), getTensorShape(bias()),
-      getTensorData<int32_t>(bias()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()),
+    getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp

index d194ce1a07c7ffe7a2b37ee152cd4c9dd9055de4..0259d3e1dfffe20fb593ca4f1007f310af9f6d69 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -50,10 +50,10 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  
  template <>
  void Check<uint8_t>(
-    std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
-    std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
-    std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
-    std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+  std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+  std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
  {
    const float quantized_tolerance = getTolerance(-127, 128, 255);
    std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
@@ -63,9 +63,9 @@ void Check<uint8_t>(
    Tensor weights_tensor = makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first,
                                                          input_quant_param.second, weights_data);
    Tensor bias_tensor = makeInputTensor<DataType::S32>(
-      bias_shape, input_quant_param.first * input_quant_param.first, 0, bias_data);
+    bias_shape, input_quant_param.first * input_quant_param.first, 0, bias_data);
    Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
    FullyConnectedParams params{};
    params.activation = Activation::RELU;
@@ -90,32 +90,33 @@ TYPED_TEST(FullyConnectedTest, Simple)
  {
    Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
                     {
-                       -3, -5, 5, 4, 9, -2,  // batch = 0
-                       -3, -2, -4, 9, -8, 1, // batch = 1
+                     -3, -5, 5, 4, 9, -2,  // batch = 0
+                     -3, -2, -4, 9, -8, 1, // batch = 1
                     },
                     {
-                       -3, -7, 4, -4, -6, 4, // unit = 0
-                       3, 5, 2, 3, -3, -8,   // unit = 1
-                       -3, 7, 4, 9, 0, -5,   // unit = 2
+                     -3, -7, 4, -4, -6, 4, // unit = 0
+                     3, 5, 2, 3, -3, -8,   // unit = 1
+                     -3, 7, 4, 9, 0, -5,   // unit = 2
                     },
-                   {-1, -5, -8}, {
-                                     0, 0, 32,   // batch = 0
-                                     22, 11, 47, // batch = 1
-                                 });
+                   {-1, -5, -8},
+                   {
+                     0, 0, 32,   // batch = 0
+                     22, 11, 47, // batch = 1
+                   });
  }
  
  TEST(FullyConnectedTest, InvalidBiasType_NEG)
  {
    Shape input_shape{3, 2, 2, 1};
    std::vector<float> input_data{
-      -3, -5, 5,  4, 9,  -2, // batch = 0
-      -3, -2, -4, 9, -8, 1,  // batch = 1
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
    };
    Shape weights_shape{3, 6};
    std::vector<float> weights_data{
-      -3, -7, 4, -4, -6, 4,  // unit = 0
-      3,  5,  2, 3,  -3, -8, // unit = 1
-      -3, 7,  4, 9,  0,  -5, // unit = 2
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
    };
    Shape bias_shape{3};
    std::vector<int32_t> bias_data{-1, -5, -8};
@@ -136,14 +137,14 @@ TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
  {
    Shape input_shape{3, 2, 2, 1};
    std::vector<float> input_data{
-      -3, -5, 5,  4, 9,  -2, // batch = 0
-      -3, -2, -4, 9, -8, 1,  // batch = 1
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
    };
    Shape weights_shape{1, 3, 6};
    std::vector<float> weights_data{
-      -3, -7, 4, -4, -6, 4,  // unit = 0
-      3,  5,  2, 3,  -3, -8, // unit = 1
-      -3, 7,  4, 9,  0,  -5, // unit = 2
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
    };
    Shape bias_shape{3};
    std::vector<float> bias_data{-1, -5, -8};
@@ -164,17 +165,17 @@ TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
  {
    Shape input_shape{3, 2, 2, 1};
    std::vector<float> input_data{
-      -3, -5, 5,  4, 9,  -2, // batch = 0
-      -3, -2, -4, 9, -8, 1,  // batch = 1
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
    };
    Shape weights_shape{6, 3};
    std::vector<float> weights_data{
-      -3, -7, 4,  // unit = 0
-      -4, -6, 4,  // unit = 1
-      3,  5,  2,  // unit = 2
-      3,  -3, -8, // unit = 3
-      -3, 7,  4,  // unit = 4
-      9,  0,  -5, // unit = 5
+    -3, -7, 4,  // unit = 0
+    -4, -6, 4,  // unit = 1
+    3,  5,  2,  // unit = 2
+    3,  -3, -8, // unit = 3
+    -3, 7,  4,  // unit = 4
+    9,  0,  -5, // unit = 5
    };
    Shape bias_shape{3};
    std::vector<float> bias_data{-1, -5, -8};
diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp

index 3122fa840bc330e28d825ff12af3cc34f4b294e7..3fcc8660342e85c5ae75b355dceafea6dd6946fe 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Greater.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
@@ -30,18 +30,18 @@ using namespace testing;
  TEST(GreaterTest, FloatSimple)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
-      -1,  0,   1,   // Row 2
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      false, false, true,  // Row 1
-      true,  false, false, // Row 2
+    false, false, true,  // Row 1
+    true,  false, false, // Row 2
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
@@ -59,19 +59,19 @@ TEST(GreaterTest, FloatSimple)
  TEST(GreaterTest, FloatBroardcast)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
-      -1,  0,   1,   // Row 3
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
+    0.9, 0.7, 0.5, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      false, false, true,  // Row 1
-      true,  false, false, // Row 2
-      false, false, true,  // Row 3
+    false, false, true,  // Row 1
+    true,  false, false, // Row 2
+    false, false, true,  // Row 3
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
@@ -93,25 +93,25 @@ const float F_MAX = 127.0 / 128.0;
  TEST(GreaterTest, Uint8Quantized)
  {
    std::vector<float> x_data{
-      0.5, 0.6, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.6,  0.6, 0.5, // Row 1
-      -1,  0.05, 0,   1,   // Row 2
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      false, false, true, true,  // Row 1
-      true,  false, true, false, // Row 2
+    false, false, true, true,  // Row 1
+    true,  false, true, false, // Row 2
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
@@ -125,27 +125,27 @@ TEST(GreaterTest, Uint8Quantized)
  TEST(GreaterTest, Uint8QuantizedRescale)
  {
    std::vector<float> x_data{
-      0.5, 0.6, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.6,  0.6, 0.5, // Row 1
-      -1,  0.05, 0,   1,   // Row 2
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      false, false, true, true,  // Row 1
-      true,  false, true, false, // Row 2
+    false, false, true, true,  // Row 1
+    true,  false, true, false, // Row 2
    };
  
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
  
-  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
-                                                  x_quant_param.second, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
-                                                  y_quant_param.second, y_data);
+  Tensor x_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+  Tensor y_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
@@ -159,26 +159,26 @@ TEST(GreaterTest, Uint8QuantizedRescale)
  TEST(GreaterTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
-      0.4,  -0.8, 0.7,  0.3, // Row 1
-      -0.5, 0.1,  0,    0.5, // Row 2
-      1,    0,    0.05, -1,  // Row 3
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
    };
  
    std::vector<float> y_data{
-      -1, 0.05, 0, 1, // Row 1
+    -1, 0.05, 0, 1, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      true, false, true,  false, // Row 1
-      true, true,  false, false, // Row 2
-      true, false, true,  false, // Row 3
+    true, false, true,  false, // Row 1
+    true, true,  false, false, // Row 2
+    true, false, true,  false, // Row 3
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp

index 68135e27c8bec3896391d527f2b904e581de6051..e7c1b4afe1fb059d8a13a798d36c1bb7c3dff50f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -28,7 +28,7 @@ namespace kernels
  {
  
  GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
-    : Kernel({x, y}, {output})
+  : Kernel({x, y}, {output})
  {
  }
  
@@ -101,8 +101,8 @@ void GreaterEqual::evalQuantized() const
    if (op_params.is_broadcast)
    {
      tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
-        op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
-        getTensorShape(output()), output_data);
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
    }
    else
    {
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp

index 11e62644c39c965b6cf2a3b6d0cb66e98c46ce50..7c79d8abc34ebff91405c45b78261c1d5c49c76d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -30,18 +30,18 @@ using namespace testing;
  TEST(GreaterEqualTest, FloatSimple)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
-      -1,  0,   1,   // Row 2
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      false, true, true,  // Row 1
-      true,  true, false, // Row 2
+    false, true, true,  // Row 1
+    true,  true, false, // Row 2
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
@@ -59,19 +59,19 @@ TEST(GreaterEqualTest, FloatSimple)
  TEST(GreaterEqualTest, FloatBroardcast)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
-      -1,  0,   1,   // Row 3
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
+    0.9, 0.7, 0.5, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      false, true,  true,  // Row 1
-      true,  false, false, // Row 2
-      false, false, true,  // Row 3
+    false, true,  true,  // Row 1
+    true,  false, false, // Row 2
+    false, false, true,  // Row 3
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
@@ -93,25 +93,25 @@ const float F_MAX = 127.0 / 128.0;
  TEST(GreaterEqualTest, Uint8Quantized)
  {
    std::vector<float> x_data{
-      0.5, 0.6, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.6,  0.55, 0.5, // Row 1
-      -1,  0.05, 0,    1,   // Row 2
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      false, true,  true, true,  // Row 1
-      true,  false, true, false, // Row 2
+    false, true,  true, true,  // Row 1
+    true,  false, true, false, // Row 2
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
@@ -125,27 +125,27 @@ TEST(GreaterEqualTest, Uint8Quantized)
  TEST(GreaterEqualTest, Uint8QuantizedRescale)
  {
    std::vector<float> x_data{
-      0.5, 0.5, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.5,  0.6, 0.5, // Row 1
-      -1,  0.05, 0,   1,   // Row 2
+    0.9, 0.5,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      false, true,  true, true,  // Row 1
-      true,  false, true, false, // Row 2
+    false, true,  true, true,  // Row 1
+    true,  false, true, false, // Row 2
    };
  
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
  
-  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
-                                                  x_quant_param.second, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
-                                                  y_quant_param.second, y_data);
+  Tensor x_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+  Tensor y_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
@@ -159,26 +159,26 @@ TEST(GreaterEqualTest, Uint8QuantizedRescale)
  TEST(GreaterEqualTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
-      0.4,  -0.8, 0.7,  0.3, // Row 1
-      -0.5, 0.1,  0,    0.5, // Row 2
-      1,    0,    0.05, -1,  // Row 3
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
    };
  
    std::vector<float> y_data{
-      -1, 0.05, 0, 1, // Row 1
+    -1, 0.05, 0, 1, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      true, false, true, false, // Row 1
-      true, true,  true, false, // Row 2
-      true, false, true, false, // Row 3
+    true, false, true, false, // Row 1
+    true, true,  true, false, // Row 2
+    true, false, true, false, // Row 3
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/If.cpp b/compiler/luci-interpreter/src/kernels/If.cpp

index ca982d591015bbca129659efb3aef70da1a35056..a267f6267f60eab7ceef8ae4bd3b5ca5a86d8b00 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/If.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.cpp
@@ -34,8 +34,8 @@ static std::vector<const Tensor *> joinInputs(const Tensor *cond,
  
  If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
         RuntimeGraph *then_graph, RuntimeGraph *else_graph)
-    : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
-      _else_graph(else_graph)
+  : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
+    _else_graph(else_graph)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-interpreter/src/kernels/If.test.cpp

index 6967407fbc6c710791f642866924d676c2af37d8..0dba310d9ca1bb6494cd4c3cf5fb6d7e355cdb06 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/If.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.test.cpp
@@ -34,11 +34,11 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
  {
    RuntimeGraph *graph = module->addGraph();
    Tensor *input1 = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
    Tensor *input2 = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
    Tensor *output = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
  
    graph->setInputTensors({input1, input2});
    graph->setOutputTensors({output});
@@ -54,11 +54,11 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
  {
    RuntimeGraph *graph = module->addGraph();
    Tensor *input1 = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
    Tensor *input2 = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
    Tensor *output = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
  
    graph->setInputTensors({input1, input2});
    graph->setOutputTensors({output});
diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp

index 8e8241a2860577bd7bc1fc22994364992ca69949..b8317e2f2b9cc7beea930b7af5a3ee664f5654d9 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp
+++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp
@@ -28,7 +28,7 @@ namespace kernels
  
  InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
                             Tensor *output, const InstanceNormParams &params)
-    : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+  : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
  {
  }
  
@@ -96,11 +96,11 @@ void InstanceNorm::evalFloat() const
          for (int32_t width = 0; width < widths; width++)
          {
            double input_value =
-              input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+            input_data[tflite::Offset(output_shape, batch, height, width, channel)];
            double output_value = input_value * a + b;
            output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
-              tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
-                                                   activation_max);
+            tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+                                                 activation_max);
          }
        }
      }
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp

index 0bf133d9cf0a5844e15bfbe065eb8fe181a5aadd..2eaf5404e3ba4bd29015b3ef22ef58607591e6a1 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -28,7 +28,7 @@ namespace kernels
  {
  
  L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
-    : KernelWithParams<L2NormParams>({input}, {output}, params)
+  : KernelWithParams<L2NormParams>({input}, {output}, params)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp

index 8f9431182a180f2a5da7b3058e5d57f244d3683e..6281b451b53646c8451b9c51e55f7405d92fd643 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -51,11 +51,11 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
                      std::initializer_list<float> output_data)
  {
    std::pair<float, int32_t> quant_param =
-      quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
-                                  std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
  
    Tensor input_tensor =
-      makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+    makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
  
    L2NormParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp

index 979364a7f4d695608da5c315422acecc6e5f168c..5bf3ba5a8a5f3f3de8c6d898dd1f22b0926df63b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -30,7 +30,7 @@ namespace kernels
  {
  
  L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
  {
  }
  
@@ -49,11 +49,11 @@ void L2Pool2D::configure()
    int out_width, out_height;
    out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
    out_height =
-      computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+    computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
    _padding_width =
-      computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+    computePadding(params().stride_width, 1, width, params().filter_width, out_width);
    _padding_height =
-      computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+    computePadding(params().stride_height, 1, height, params().filter_height, out_height);
  
    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
    output()->resize({batches, out_height, out_width, channels_out});
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp

index 5f834e3c1efe436b224ee7bd86f0f1076116379b..52f426a08ff0ad980835f9c9ae114ebfcdce3751 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -31,8 +31,8 @@ TEST(L2Pool2DTest, FloatNone)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -58,8 +58,8 @@ TEST(L2Pool2DTest, FloatRelu)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      -1, -6, 2,  4, //
-      -3, -2, 10, 7, //
+    -1, -6, 2,  4, //
+    -3, -2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -85,8 +85,8 @@ TEST(L2Pool2DTest, FloatRelu1)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      -0.1, -0.6, 2,  4, //
-      -0.3, -0.2, 10, 7, //
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -112,8 +112,8 @@ TEST(L2Pool2DTest, FloatRelu6)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      -0.1, -0.6, 2,  4, //
-      -0.3, -0.2, 10, 7, //
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -139,8 +139,8 @@ TEST(L2Pool2DTest, FloatPaddingSame)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -166,8 +166,8 @@ TEST(L2Pool2DTest, FloatPaddingSameStride)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -193,8 +193,8 @@ TEST(L2Pool2DTest, FloatPaddingValidStride)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -220,8 +220,8 @@ TEST(L2Pool2DTest, InvalidInputShape_NEG)
  {
    Shape input_shape{1, 2, 4};
    std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -242,8 +242,8 @@ TEST(L2Pool2DTest, InvalidInputOutputType_NEG)
  {
    Shape input_shape{1, 2, 4};
    std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8);
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp

index 919b127928cb0131a651401d2233977db5bff26a..f468da5d32b27b0fc34ed9a88bf1974f2b157072 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -30,7 +30,7 @@ namespace kernels
  {
  
  LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params)
-    : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+  : KernelWithParams<LeakyReluParams>({input}, {output}, params)
  {
  }
  
@@ -82,8 +82,8 @@ void LeakyRelu::evalQuantized() const
    op_params.output_shift_identity = _output_shift_identity;
  
    tflite::reference_ops::QuantizeLeakyRelu(
-      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
-      getTensorData<uint8_t>(output()));
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+    getTensorData<uint8_t>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp

index 2778549ed4f3535a670b9e4e9571a333a4b9b725..b5cc3e7fce9a351a6766f08da59e779ecff26351 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -56,7 +56,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
    const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
    Tensor input_tensor =
-      makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+    makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    LeakyReluParams params{};
@@ -84,13 +84,13 @@ TYPED_TEST(LeakReluTest, Simple)
    Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
                     /*input_data=*/
                     {
-                       0.0f, 1.0f, 3.0f,   // Row 1
-                       1.0f, -1.0f, -2.0f, // Row 2
+                     0.0f, 1.0f, 3.0f,   // Row 1
+                     1.0f, -1.0f, -2.0f, // Row 2
                     },
                     /*output_data=*/
                     {
-                       0.0f, 1.0f, 3.0f,   // Row 1
-                       1.0f, -0.5f, -1.0f, // Row 2
+                     0.0f, 1.0f, 3.0f,   // Row 1
+                     1.0f, -0.5f, -1.0f, // Row 2
                     },
                     /*alpha=*/0.5f);
  
@@ -100,8 +100,8 @@ TYPED_TEST(LeakReluTest, Simple)
  TEST(LeakReluTest, IvalidInputOutputType_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, {
-                                                                       0.0f, 1.0f, 3.0f,   // Row 1
-                                                                       1.0f, -1.0f, -2.0f, // Row 2
+                                                                     0.0f, 1.0f, 3.0f,   // Row 1
+                                                                     1.0f, -1.0f, -2.0f, // Row 2
                                                                     });
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp

index 73aa30b367a9e2873d4bf9d0e15d0240fb6f814a..2972bd559b26b8027a66d03226f87e19754e996d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Less.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp
@@ -30,18 +30,18 @@ using namespace testing;
  TEST(LessTest, FloatSimple)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
-      -1,  0,   1,   // Row 2
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      true,  false, false, // Row 1
-      false, false, true,  // Row 2
+    true,  false, false, // Row 1
+    false, false, true,  // Row 2
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
@@ -59,19 +59,19 @@ TEST(LessTest, FloatSimple)
  TEST(LessTest, FloatBroardcast)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
-      -1,  0,   1,   // Row 3
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
+    0.9, 0.7, 0.5, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      true,  false, false, // Row 1
-      false, true,  true,  // Row 2
-      true,  true,  false, // Row 3
+    true,  false, false, // Row 1
+    false, true,  true,  // Row 2
+    true,  true,  false, // Row 3
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
@@ -93,25 +93,25 @@ const float F_MAX = 127.0 / 128.0;
  TEST(LessTest, Uint8Quantized)
  {
    std::vector<float> x_data{
-      0.5, 0.6, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.6,  0.55, 0.5, // Row 1
-      -1,  0.05, 0,    1,   // Row 2
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      true,  false, false, false, // Row 1
-      false, true,  false, true,  // Row 2
+    true,  false, false, false, // Row 1
+    false, true,  false, true,  // Row 2
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
@@ -125,27 +125,27 @@ TEST(LessTest, Uint8Quantized)
  TEST(LessTest, Uint8QuantizedRescale)
  {
    std::vector<float> x_data{
-      0.5, 0.6, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.6,  0.6, 0.5, // Row 1
-      -1,  0.05, 0,   1,   // Row 2
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      true,  false, false, false, // Row 1
-      false, true,  false, true,  // Row 2
+    true,  false, false, false, // Row 1
+    false, true,  false, true,  // Row 2
    };
  
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
  
-  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
-                                                  x_quant_param.second, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
-                                                  y_quant_param.second, y_data);
+  Tensor x_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+  Tensor y_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
@@ -159,26 +159,26 @@ TEST(LessTest, Uint8QuantizedRescale)
  TEST(LessTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
-      0.4,  -0.8, 0.7,  0.3, // Row 1
-      -0.5, 0.1,  0,    0.5, // Row 2
-      1,    0,    0.05, -1,  // Row 3
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
    };
  
    std::vector<float> y_data{
-      -1, 0.05, 0, 1, // Row 1
+    -1, 0.05, 0, 1, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      false, true,  false, true, // Row 1
-      false, false, false, true, // Row 2
-      false, true,  false, true, // Row 3
+    false, true,  false, true, // Row 1
+    false, false, false, true, // Row 2
+    false, true,  false, true, // Row 3
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.cpp

index b8aaba17851e208dd21cc09c55699549e5c4dae3..5f4c7f7aacd3a9be849ac597306eb91de26ca96f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LessEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
@@ -98,8 +98,8 @@ void LessEqual::evalQuantized() const
    if (op_params.is_broadcast)
    {
      tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
-        op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
-        getTensorShape(output()), output_data);
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
    }
    else
    {
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp

index 9184c061f607a21206f2a23834954b9f231918ba..db65815a673cb5a7b94843bdc13a8f6fa85b9d8b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -30,18 +30,18 @@ using namespace testing;
  TEST(LessEqualTest, FloatSimple)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
-      -1,  0,   1,   // Row 2
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      true,  true, false, // Row 1
-      false, true, true,  // Row 2
+    true,  true, false, // Row 1
+    false, true, true,  // Row 2
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
@@ -59,19 +59,19 @@ TEST(LessEqualTest, FloatSimple)
  TEST(LessEqualTest, FloatBroardcast)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
-      -1,  0,   1,   // Row 3
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
+    0.9, 0.7, 0.5, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      true,  true, false, // Row 1
-      false, true, true,  // Row 2
-      true,  true, false, // Row 3
+    true,  true, false, // Row 1
+    false, true, true,  // Row 2
+    true,  true, false, // Row 3
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
@@ -93,25 +93,25 @@ const float F_MAX = 127.0 / 128.0;
  TEST(LessEqualTest, Uint8Quantized)
  {
    std::vector<float> x_data{
-      0.5, 0.6, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.6,  0.55, 0.5, // Row 1
-      -1,  0.05, 0,    1,   // Row 2
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      true,  true, false, false, // Row 1
-      false, true, false, true,  // Row 2
+    true,  true, false, false, // Row 1
+    false, true, false, true,  // Row 2
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
@@ -125,27 +125,27 @@ TEST(LessEqualTest, Uint8Quantized)
  TEST(LessEqualTest, Uint8QuantizedRescale)
  {
    std::vector<float> x_data{
-      0.5, 0.6, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.6,  0.6, 0.5, // Row 1
-      -1,  0.05, 0,   1,   // Row 2
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      true,  true, false, false, // Row 1
-      false, true, false, true,  // Row 2
+    true,  true, false, false, // Row 1
+    false, true, false, true,  // Row 2
    };
  
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
  
-  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
-                                                  x_quant_param.second, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
-                                                  y_quant_param.second, y_data);
+  Tensor x_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+  Tensor y_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
@@ -159,26 +159,26 @@ TEST(LessEqualTest, Uint8QuantizedRescale)
  TEST(LessEqualTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
-      0.4,  -0.8, 0.7,  0.3, // Row 1
-      -0.5, 0.1,  0,    0.5, // Row 2
-      1,    0,    0.05, -1,  // Row 3
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
    };
  
    std::vector<float> y_data{
-      -1, 0.05, 0, 1, // Row 1
+    -1, 0.05, 0, 1, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      false, true,  false, true, // Row 1
-      false, false, true,  true, // Row 2
-      false, true,  false, true, // Row 3
+    false, true,  false, true, // Row 1
+    false, false, true,  true, // Row 2
+    false, true,  false, true, // Row 3
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp

index b78e2712875c67a51a9230a760bda71fc2369318..fd2ec41a155f6ccb2c3cac36dfa622f74589145f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -29,8 +29,8 @@ namespace kernels
  {
  
  LocalResponseNormalization::LocalResponseNormalization(
-    const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
-    : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+  const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
+  : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
  {
  }
  
@@ -53,8 +53,8 @@ void LocalResponseNormalization::execute() const
        op_params.alpha = params().alpha;
        op_params.beta = params().beta;
        tflite::optimized_ops::LocalResponseNormalization(
-          op_params, getTensorShape(input()), getTensorData<float>(input()),
-          getTensorShape(output()), getTensorData<float>(output()));
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+        getTensorData<float>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp

index d98305c1a91dfa798ffdeeae8a5c523b67431c36..6a4331d342e134e7e7ff9a2e02c97ce6695ec6a9 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -30,7 +30,7 @@ using namespace testing;
  TEST(LocalResponseNormalizationTest, SameAsL2Norm)
  {
    Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -50,7 +50,7 @@ TEST(LocalResponseNormalizationTest, SameAsL2Norm)
  TEST(LocalResponseNormalizationTest, WithAlpha)
  {
    Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -70,7 +70,7 @@ TEST(LocalResponseNormalizationTest, WithAlpha)
  TEST(LocalResponseNormalizationTest, WithBias)
  {
    Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -90,7 +90,7 @@ TEST(LocalResponseNormalizationTest, WithBias)
  TEST(LocalResponseNormalizationTest, SmallRadius)
  {
    Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -110,7 +110,7 @@ TEST(LocalResponseNormalizationTest, SmallRadius)
  TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
  {
    Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+    makeInputTensor<DataType::FLOAT32>({1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -126,7 +126,7 @@ TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
  TEST(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
  {
    Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    LocalResponseNormalizationParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp

index d3b331dfeffa8d2bc5889376d166850d51597ce9..8a90c1dd0ddefdd77e012482cdfacedc25a98b61 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -31,8 +31,8 @@ TEST(LogSoftmaxTest, Float)
  {
    Shape input_shape{2, 4};
    std::vector<float> input_data{
-      0, -6, 2,  4, //
-      3, -2, 10, 1, //
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -42,8 +42,8 @@ TEST(LogSoftmaxTest, Float)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      -4.14297, -10.14297, -2.14297,   -.142971, //
-      -7.00104, -12.00104, -.00104087, -9.00104, //
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
    };
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
  }
@@ -55,11 +55,11 @@ TEST(LogSoftmaxTest, Uint8)
    float kLogSoftmaxQuantizedTolerance = 16. / 256;
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
    std::vector<float> input_data{
-      0, -6, 2,  4, //
-      3, -2, 10, 1, //
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
    };
    Tensor input_tensor =
-      makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+    makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
  
    LogSoftmax kernel(&input_tensor, &output_tensor);
@@ -67,8 +67,8 @@ TEST(LogSoftmaxTest, Uint8)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      -4.14297, -10.14297, -2.14297,   -.142971, //
-      -7.00104, -12.00104, -.00104087, -9.00104, //
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
    };
    std::vector<int32_t> ref_output_shape{2, 4};
    EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -81,8 +81,8 @@ TEST(LogSoftmaxTest, Uint8)
  TEST(LogSoftmaxTest, InvalidInputOutputType_NEG)
  {
    std::vector<float> input_data{
-      0, -6, 2,  4, //
-      3, -2, 10, 1, //
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 4}, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
@@ -95,11 +95,11 @@ TEST(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
  {
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
    std::vector<float> input_data{
-      0, -6, 2,  4, //
-      3, -2, 10, 1, //
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
    };
    Tensor input_tensor =
-      makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+    makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
  
    LogSoftmax kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp

index d50d5047222016f76e825b678f6e1664773af5fb..8e72632313ab2c9e77bedc254aabf6749ae0423f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp
@@ -27,7 +27,7 @@ namespace kernels
  {
  
  LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
-    : Kernel({input1, input2}, {output})
+  : Kernel({input1, input2}, {output})
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp

index bd2208a4bcc7432286eb717c4d240f1d5e6c63d9..7027a2a8bc416b4c474d1d7b04824d36bbdec302 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
@@ -28,7 +28,7 @@ namespace kernels
  {
  
  LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
-    : Kernel({input1, input2}, {output})
+  : Kernel({input1, input2}, {output})
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp

index d3bbb330d06b61886a04b5ae111f49afda88e775..41369a41780917e929c878eaf285af2ca1e93af6 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -48,7 +48,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
                      std::initializer_list<float> output_data)
  {
    std::pair<float, int32_t> input_quant_param =
-      quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
    Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
                                                        input_quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
@@ -72,37 +72,37 @@ TYPED_TEST_CASE(LogisticTest, DataTypes);
  TYPED_TEST(LogisticTest, Simple)
  {
    Check<TypeParam>(
-      {89}, {89},
-      {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
-       -8.6363636364,  -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
-       -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
-       -5.9090909091,  -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
-       -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
-       -3.1818181818,  -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
-       -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
-       -0.4545454545,  -0.2272727273, 0.0000000000,  0.2272727273,  0.4545454545,  0.6818181818,
-       0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,  1.8181818182,  2.0454545455,
-       2.2727272727,   2.5000000000,  2.7272727273,  2.9545454545,  3.1818181818,  3.4090909091,
-       3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,  4.5454545455,  4.7727272727,
-       5.0000000000,   5.2272727273,  5.4545454545,  5.6818181818,  5.9090909091,  6.1363636364,
-       6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,  7.2727272727,  7.5000000000,
-       7.7272727273,   7.9545454545,  8.1818181818,  8.4090909091,  8.6363636364,  8.8636363636,
-       9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,  10.0000000000},
-      {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
-       0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
-       0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
-       0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
-       0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
-       0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
-       0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
-       0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
-       0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
-       0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
-       0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
-       0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
-       0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
-       0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
-       0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
+    {89}, {89},
+    {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+     -8.6363636364,  -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+     -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+     -5.9090909091,  -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+     -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+     -3.1818181818,  -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+     -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+     -0.4545454545,  -0.2272727273, 0.0000000000,  0.2272727273,  0.4545454545,  0.6818181818,
+     0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,  1.8181818182,  2.0454545455,
+     2.2727272727,   2.5000000000,  2.7272727273,  2.9545454545,  3.1818181818,  3.4090909091,
+     3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,  4.5454545455,  4.7727272727,
+     5.0000000000,   5.2272727273,  5.4545454545,  5.6818181818,  5.9090909091,  6.1363636364,
+     6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,  7.2727272727,  7.5000000000,
+     7.7272727273,   7.9545454545,  8.1818181818,  8.4090909091,  8.6363636364,  8.8636363636,
+     9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,  10.0000000000},
+    {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+     0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+     0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+     0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+     0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+     0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+     0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+     0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+     0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+     0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+     0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+     0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+     0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+     0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+     0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
  }
  
  TEST(LogisticTest, IvalidInputOutputType_NEG)
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp

index 123e6e1a2e48784b410f00f3ead8a881f7cd0e7f..8d9760ff2d795c4fe4760399a63a7890ec6e41f2 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
@@ -30,7 +30,7 @@ namespace kernels
  {
  
  MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
  {
  }
  
@@ -44,15 +44,15 @@ void MaxPool2D::configure()
    const int32_t input_width = input_shape.dim(2);
    const int32_t depth = input_shape.dim(3);
  
-  const int32_t output_height = computeOutputSize(_params.padding, input_height,
-                                                  _params.filter_height, _params.stride_height);
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
    const int32_t output_width =
-      computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
  
    _padding_height =
-      computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
    _padding_width =
-      computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
  
    output()->resize({batches, output_height, output_width, depth});
    if (input()->element_type() == DataType::U8)
@@ -142,8 +142,8 @@ void MaxPool2D::evalSInt16() const
    params.quantized_activation_max = activation_max;
  
    tflite::reference_integer_ops::MaxPool(
-      params, getTensorShape(input()), getTensorData<int16_t>(input()), //
-      getTensorShape(output()), getTensorData<int16_t>(output()));
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp

index 1d7fe06c4c66cd190fc3765e42e871fb0f4886a0..b9991f7ec7b26379f44211f578790f68fb35dad9 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -30,9 +30,9 @@ TEST(MaxPool2DTest, Float)
  {
    Shape input_shape{1, 3, 5, 1};
    std::vector<float> input_data{
-      1,  -1, 0,  -2, 2,  //
-      -7, -6, -5, -4, -3, //
-      5,  4,  3,  6,  7,  //
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -50,8 +50,8 @@ TEST(MaxPool2DTest, Float)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      1, 2, //
-      5, 6, //
+    1, 2, //
+    5, 6, //
    };
    std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
@@ -62,11 +62,11 @@ TEST(MaxPool2DTest, Uint8)
  {
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
    std::vector<float> input_data{
-      0,  -6, 12, 4, //
-      -3, -2, 10, 7, //
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
-                                                      quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Pool2DParams params{};
@@ -92,13 +92,13 @@ TEST(MaxPool2DTest, SInt16)
    Shape input_shape{1, 3, 5, 1};
    std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
    std::vector<float> input_data{
-      1,  -1, 0,  -2, 2,  //
-      -7, -6, -5, -4, -3, //
-      5,  4,  3,  6,  7,  //
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
    };
    std::vector<float> ref_output_data{
-      1, 2, //
-      5, 6, //
+    1, 2, //
+    5, 6, //
    };
  
    Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data);
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-interpreter/src/kernels/Maximum.cpp

index c522b070678f1bee35cb9680e260beaa1fb6999d..b102b5e2745fa1bccdb1222f86f7edd139bb1e10 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Maximum.cpp
+++ b/compiler/luci-interpreter/src/kernels/Maximum.cpp
@@ -27,7 +27,7 @@ namespace kernels
  {
  
  Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
-    : Kernel({input1, input2}, {output})
+  : Kernel({input1, input2}, {output})
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp

index f20cf7d8988a6c89f7468b641ffdc2d3b8fc8ab5..421632812ee6065108a74466b4ac7e48b66ab83a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mean.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.cpp
@@ -124,7 +124,7 @@ static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int
  }
  
  Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params)
-    : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
  {
  }
  
@@ -149,16 +149,15 @@ void Mean::configure()
  
    tflite::MeanParams params{};
    resolveAxes(axes_data, num_axes, &params);
-  const bool need_temporaries =
-      !(_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
-        ((params.axis[0] == 1 && params.axis[1] == 2) ||
-         (params.axis[0] == 2 && params.axis[1] == 1)));
+  const bool need_temporaries = !(
+    _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+    ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
    if (need_temporaries)
    {
      _temp_index =
-        std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
+      std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
      _resolved_axes =
-        std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
+      std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
      _temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(),
                                           AffineQuantization{}, "");
    }
@@ -209,11 +208,11 @@ void Mean::evalFloat() const
    else
    {
      tflite::reference_ops::Mean(
-        getTensorData<float>(input()), getTensorShape(input()).DimsData(),
-        input()->shape().num_dims(), getTensorData<float>(output()),
-        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-        _params.keep_dims, getTensorData<int>(_temp_index.get()),
-        getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
+      getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+      input()->shape().num_dims(), getTensorData<float>(output()),
+      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+      _params.keep_dims, getTensorData<int>(_temp_index.get()),
+      getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
    }
  }
  
@@ -240,22 +239,22 @@ void Mean::evalQuantized() const
    else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
    {
      tflite::reference_ops::Mean(
-        getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
-        input()->shape().num_dims(), getTensorData<uint8_t>(output()),
-        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-        _params.keep_dims, getTensorData<int>(_temp_index.get()),
-        getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
+      getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+      input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+      _params.keep_dims, getTensorData<int>(_temp_index.get()),
+      getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
    }
    else
    {
      tflite::reference_ops::QuantizedMeanOrSum<>(
-        getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
-        getTensorShape(input()).DimsData(), input()->shape().num_dims(),
-        getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
-        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-        _params.keep_dims, getTensorData<int>(_temp_index.get()),
-        getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
-        /*compute_sum=*/false);
+      getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+      getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+      getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+      _params.keep_dims, getTensorData<int>(_temp_index.get()),
+      getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
+      /*compute_sum=*/false);
    }
  }
  
@@ -288,7 +287,7 @@ void Mean::evalQuantizedS16() const
      assert(output_shape.dim(3) == depth);
  
      const double real_multiplier =
-        static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+      static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
  
      int32_t output_multiplier{};
      int output_shift{};
@@ -309,11 +308,11 @@ void Mean::evalQuantizedS16() const
            }
          }
          int32_t scaled_acc =
-            tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+          tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
          // Divide by the number of elements rounding to the nearest integer.
          scaled_acc = scaled_acc > 0
-                         ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
-                         : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+                       ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+                       : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
  
          scaled_acc = std::max(scaled_acc, output_min);
          scaled_acc = std::min(scaled_acc, output_max);
diff --git a/compiler/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-interpreter/src/kernels/Mean.test.cpp

index e81d2ad5f58548294f0e170787cbd9cb0c2011ad..fa0ba21691382544cff15505dbc5f365bf91f47f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mean.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
@@ -107,7 +107,7 @@ TEST(MeanTest, Uint8KeepDims)
  
    std::vector<int32_t> axis_data{1};
    Tensor input_tensor =
-      makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, input_data);
+    makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, input_data);
    Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
@@ -133,7 +133,7 @@ TEST(MeanTest, Uint8NotKeepDims)
  
    std::vector<int32_t> axis_data{1};
    Tensor input_tensor =
-      makeInputTensor<DataType::U8>({1, 3, 2}, quant_param.first, quant_param.second, input_data);
+    makeInputTensor<DataType::U8>({1, 3, 2}, quant_param.first, quant_param.second, input_data);
    Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-interpreter/src/kernels/Minimum.cpp

index 5eb13455e9ef9c907aaf052b8e10c1c7f912a3c1..5d3dcde72a5e0f3de834fb637fe653313ae96506 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Minimum.cpp
+++ b/compiler/luci-interpreter/src/kernels/Minimum.cpp
@@ -27,7 +27,7 @@ namespace kernels
  {
  
  Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
-    : Kernel({input1, input2}, {output})
+  : Kernel({input1, input2}, {output})
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp

index 513d147a33af217b0d0675d066de2298a945ed3d..4e6e3f75a7b0ae5fe778fdc26c1294f541604acf 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -30,7 +30,7 @@ namespace kernels
  {
  
  Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
-    : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+  : KernelWithParams<MulParams>({input1, input2}, {output}, params)
  {
  }
  
@@ -73,13 +73,13 @@ void Mul::evalFloat() const
    params.float_activation_max = activation_max;
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
  
    if (need_broadcast)
    {
      tflite::optimized_ops::BroadcastMul4DSlow(
-        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
    }
    else
    {
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp

index 1409b3fae798bdef44e5dcac500e113846c1ad21..fc7ffb5a19456ec9f50f8d29345e8be9ae2e321b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -32,14 +32,14 @@ TEST(MulTest, Float)
    Shape base_shape = {2, 3, 1, 2};
    std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
    std::vector<std::vector<float>> test_outputs = {
-      {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
-       0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
-       0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
-      {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
-      {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
-       0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
-       0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
-      {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+    {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+     0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+     0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+     0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+     0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+    {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
    std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                                   1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
    std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
@@ -57,7 +57,7 @@ TEST(MulTest, Float)
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
-        << "With shape number " << i;
+      << "With shape number " << i;
    }
    // Re-run with exchanged inputs.
    for (size_t i = 0; i < test_shapes.size(); ++i)
@@ -74,7 +74,7 @@ TEST(MulTest, Float)
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
-        << "With shape number " << i;
+      << "With shape number " << i;
    }
  }
  
@@ -83,25 +83,25 @@ TEST(MulTest, SInt16)
    Shape base_shape = {2, 3, 1, 2};
    std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
    std::vector<std::vector<int32_t>> ref_output_shapes{
-      {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
  
    std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                                   1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
    std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
    std::vector<std::vector<float>> ref_outputs = {
-      {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
-       0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
-       0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
-      {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
-      {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
-       0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
-       0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
-      {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+    {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+     0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+     0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+     0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+     0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+    {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
      Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
      Tensor input2_tensor =
-        makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+      makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
      Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
      const float tolerance = output_tensor.scale() * 2;
  
@@ -114,15 +114,15 @@ TEST(MulTest, SInt16)
  
      EXPECT_THAT(extractTensorShape(output_tensor),
                  ::testing::ElementsAreArray(ref_output_shapes[i]))
-        << "With shape number " << i;
+      << "With shape number " << i;
      EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
-        << "With shape number " << i;
+      << "With shape number " << i;
    }
    // Re-run with exchanged inputs and different scales.
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
      Tensor input1_tensor =
-        makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
+      makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
      Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
      Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0);
      const float tolerance = output_tensor.scale() * 2;
@@ -136,9 +136,9 @@ TEST(MulTest, SInt16)
  
      EXPECT_THAT(extractTensorShape(output_tensor),
                  ::testing::ElementsAreArray(ref_output_shapes[i]))
-        << "With shape number " << i;
+      << "With shape number " << i;
      EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
-        << "With shape number " << i;
+      << "With shape number " << i;
    }
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-interpreter/src/kernels/Neg.cpp

new file mode 100644 (file)

index 0000000..99f4d4a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Neg.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Neg::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void Neg::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Neg::evalFloat() const
+{
+  tflite::reference_ops::Negate(getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Neg.h b/compiler/luci-interpreter/src/kernels/Neg.h

new file mode 100644 (file)

index 0000000..69fa1a1
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Neg.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NEG_H
+#define LUCI_INTERPRETER_KERNELS_NEG_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Neg : public Kernel
+{
+public:
+  Neg(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NEG_H
diff --git a/compiler/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-interpreter/src/kernels/Neg.test.cpp

new file mode 100644 (file)

index 0000000..33256e1
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Neg.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Neg kernel(&input_tensor, &output_tensor);
+
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(NegTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, -1.0f, -3.0f, // Row 1
+                 -1.0f, 1.0f, 2.0f,  // Row 2
+               });
+
+  SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.cpp

index cd2f6c2c13770e2c692050a12f5bc1b6be7de0ec..99d5e0fa09198f70229dfeb6e129df7ad6831b86 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/NotEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
@@ -98,8 +98,8 @@ void NotEqual::evalQuantized() const
    if (op_params.is_broadcast)
    {
      tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
-        op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
-        getTensorShape(output()), output_data);
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
    }
    else
    {
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp

index 8c8712371293544663f43130cccfca22f02940cd..f9dc7781b75e86edefb34d7661dcc0b0562f15f0 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -30,18 +30,18 @@ using namespace testing;
  TEST(NotEqualTest, FloatSimple)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
-      -1,  0,   1,   // Row 2
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      true, false, true, // Row 1
-      true, false, true, // Row 2
+    true, false, true, // Row 1
+    true, false, true, // Row 2
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
@@ -59,21 +59,21 @@ TEST(NotEqualTest, FloatSimple)
  TEST(NotEqualTest, FloatBroardcast)
  {
    std::vector<float> x_data{
-      0.5, 0.7, 0.9, // Row 1
-      1,   0,   -1,  // Row 2
-      -1,  0,   1,   // Row 3
-      0.9, 0.7, 0.5, // Row 4
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+    0.9, 0.7, 0.5, // Row 4
    };
  
    std::vector<float> y_data{
-      0.9, 0.7, 0.5, // Row 1
+    0.9, 0.7, 0.5, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      true,  false, true,  // Row 1
-      true,  true,  true,  // Row 2
-      true,  true,  true,  // Row 3
-      false, false, false, // Row 4
+    true,  false, true,  // Row 1
+    true,  true,  true,  // Row 2
+    true,  true,  true,  // Row 3
+    false, false, false, // Row 4
    };
  
    Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
@@ -95,27 +95,27 @@ const float F_MAX = 127.0 / 128.0;
  TEST(NotEqualTest, Uint8Quantized)
  {
    std::vector<float> x_data{
-      0.5, 0.5, 0.7,  0.9, // Row 1
-      1,   0,   0.05, -1,  // Row 2
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
    };
  
    std::vector<float> y_data{
-      0.9, 0.5, 0.55, 0.5, // Row 1
-      -1,  0,   0.05, 1,   // Row 2
+    0.9, 0.5, 0.55, 0.5, // Row 1
+    -1,  0,   0.05, 1,   // Row 2
    };
  
    std::vector<bool> ref_output_data{
-      true, false, true,  true, // Row 1
-      true, false, false, true, // Row 2
+    true, false, true,  true, // Row 1
+    true, false, false, true, // Row 2
    };
  
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
-                                                  x_quant_param.second, x_data);
+  Tensor x_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
  
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
-                                                  y_quant_param.second, y_data);
+  Tensor y_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
  
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
@@ -130,28 +130,28 @@ TEST(NotEqualTest, Uint8Quantized)
  TEST(NotEqualTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
-      0.4,  -0.8, 0.7,  0.3, // Row 1
-      -0.5, 0.1,  0,    0.5, // Row 2
-      1,    0,    0.05, -1,  // Row 3
-      -1,   0.05, 0,    1,   // Row 4
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+    -1,   0.05, 0,    1,   // Row 4
    };
  
    std::vector<float> y_data{
-      -1, 0.05, 0, 1, // Row 1
+    -1, 0.05, 0, 1, // Row 1
    };
  
    std::vector<bool> ref_output_data{
-      true,  true,  true,  true,  // Row 1
-      true,  true,  false, true,  // Row 2
-      true,  true,  true,  true,  // Row 3
-      false, false, false, false, // Row 4
+    true,  true,  true,  true,  // Row 1
+    true,  true,  false, true,  // Row 2
+    true,  true,  true,  true,  // Row 3
+    false, false, false, false, // Row 4
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    Tensor x_tensor =
-      makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
+    makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
    Tensor y_tensor =
-      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-interpreter/src/kernels/Pack.cpp

new file mode 100644 (file)

index 0000000..6fee938
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pack.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params)
+  : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Pack::configure()
+{
+  LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
+
+  if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
+      t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
+      t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  for (uint32_t i = 1; i < _inputs.size(); ++i)
+  {
+    const Tensor *tensor = _inputs[i];
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+    for (int d = 0; d < t0->shape().num_dims(); ++d)
+    {
+      LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+    }
+  }
+
+  Shape output_shape(dimension_size);
+  int i = 0;
+  for (int index = 0; index < dimension_size; ++index)
+  {
+    if (index == axis)
+    {
+      output_shape.dim(index) = params().values_count;
+    }
+    else
+    {
+      output_shape.dim(index) = t0->shape().dim(i++);
+    }
+  }
+
+  if (t0->element_type() == DataType::S32 || t0->element_type() == DataType::U8 ||
+      t0->element_type() == DataType::S8 || t0->element_type() == DataType::S16 ||
+      t0->element_type() == DataType::S64)
+  {
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
+    LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
+    // Guarantee input/output quantization params match as we do not support
+    // packing quantized tensors.
+    for (int i = 0; i < params().values_count; i++)
+    {
+      LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
+      LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
+    }
+  }
+
+  output()->resize(output_shape);
+}
+
+void Pack::execute() const
+{
+  switch (_inputs[0]->element_type())
+  {
+    case DataType::FLOAT32:
+      evalGeneric<float>();
+      break;
+    case DataType::U8:
+      evalGeneric<uint8_t>();
+      break;
+    case DataType::S8:
+      evalGeneric<int8_t>();
+      break;
+    case DataType::S16:
+      evalGeneric<int16_t>();
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>();
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Pack::evalGeneric() const
+{
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+
+  VectorOfTensors<T, true> inputs(_inputs);
+  tflite::PackParams params{};
+  params.axis = axis;
+  params.inputs_count = _inputs.size();
+  tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
+                                 getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pack.h b/compiler/luci-interpreter/src/kernels/Pack.h

new file mode 100644 (file)

index 0000000..4a2fcfd
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PACK_H
+#define LUCI_INTERPRETER_KERNELS_PACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pack : public KernelWithParams<PackParams>
+{
+public:
+  Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalGeneric() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PACK_H
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp

new file mode 100644 (file)

index 0000000..092bd44
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
+           std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas,
+           std::initializer_list<T> output_data, int32_t axis)
+{
+  constexpr DataType element_type = getElementType<T>();
+  std::vector<const Tensor *> inputs(input_datas.size());
+  std::vector<Tensor> tmp_inputs;
+  for (int i = 0; i < input_datas.size(); i++)
+  {
+    if (std::is_same<T, float>::value)
+    {
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+    else
+    {
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+  }
+  for (int i = 0; i < input_datas.size(); i++)
+  {
+    inputs[i] = &tmp_inputs[i];
+  }
+
+  Tensor output_tensor = makeOutputTensor(element_type);
+  if (!std::is_same<T, float>::value)
+  {
+    output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128);
+  }
+
+  PackParams params{};
+  params.axis = axis;
+  params.values_count = input_datas.size();
+  Pack kernel(inputs, &output_tensor, params);
+
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class PackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<uint8_t, float>;
+TYPED_TEST_CASE(PackTest, DataTypes);
+
+TYPED_TEST(PackTest, ThreeInputs)
+{
+  Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+                   /*output_shape=*/{3, 2},
+                   /*input_datas=*/
+                   {{1, 4}, {2, 5}, {3, 6}},
+                   /*output_data=*/
+                   {1, 4, 2, 5, 3, 6}, /*axis=*/0);
+
+  SUCCEED();
+}
+
+TYPED_TEST(PackTest, NegAxis)
+{
+  Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+                   /*output_shape=*/{2, 3},
+                   /*input_datas=*/
+                   {{1, 4}, {2, 5}, {3, 6}},
+                   /*output_data=*/
+                   {1, 2, 3, 4, 5, 6}, /*axis=*/-1);
+
+  SUCCEED();
+}
+
+TEST(Pack, MismatchingInputValuesCount_NEG)
+{
+  std::vector<float> input1_data{1, 4};
+  std::vector<float> input2_data{2, 5};
+  std::vector<float> input3_data{3, 6};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data);
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  PackParams params{};
+  {
+    params.axis = 0;
+    params.values_count = 2;
+
+    Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+    EXPECT_ANY_THROW(kernel.configure());
+  }
+}
+
+TEST(Pack, InvalidInputAxis_NEG)
+{
+  std::vector<float> input1_data{1, 4};
+  std::vector<float> input2_data{2, 5};
+  std::vector<float> input3_data{3, 6};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data);
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  PackParams params{};
+  {
+    params.axis = 2;
+    params.values_count = 3;
+
+    Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+    EXPECT_ANY_THROW(kernel.configure());
+  }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp

index bdf3a2a957a7cc505449a195032d9d89445c1530..3e76080a940e23f4c5dd366f7793ebf47a64e9e3 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -26,7 +26,7 @@ namespace kernels
  {
  
  Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
-    : Kernel({input, paddings}, {output})
+  : Kernel({input, paddings}, {output})
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp

index 4bee07629418cccf27244df15972fdfe424e74c6..75b2e560e11c514ade7b07b396a4bdc67fe9c97a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -34,8 +34,8 @@ TEST(Pad, Uint8)
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
    std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
    std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
-                                                      quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
    Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
diff --git a/compiler/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-interpreter/src/kernels/Pow.cpp

index a0c092d335cf7c5bdbed76b262757f2cd38a4172..722c64024ffb561d525a27ad7d7840899e1955bc 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pow.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pow.cpp
@@ -27,7 +27,7 @@ namespace kernels
  {
  
  Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
-    : Kernel({input1, input2}, {output})
+  : Kernel({input1, input2}, {output})
  {
  }
  
@@ -59,7 +59,7 @@ template <typename T> void Pow::eval() const
    tflite::ArithmeticParams params{};
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
  
    if (need_broadcast)
    {
diff --git a/compiler/luci-interpreter/src/kernels/Prelu.cpp b/compiler/luci-interpreter/src/kernels/Prelu.cpp

index e658d87b59d3918f071fb147c2205d6bbb34e94e..c4b288f1b1fcb4c11320e73645f0aa87c16bff9b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Prelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/Prelu.cpp
@@ -30,24 +30,58 @@ namespace kernels
  {
  
  Prelu::Prelu(const Tensor *input, const Tensor *alpha, Tensor *output)
-    : Kernel({input, alpha}, {output})
+  : Kernel({input, alpha}, {output})
  {
  }
  
+Prelu::~Prelu()
+{
+  // Destructor declared to delete vector of alpha quantized data properly
+}
+
  void Prelu::configure()
  {
    LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
    LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+  LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
  
-  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  if (input()->element_type() == DataType::U8)
    {
-    if (input()->element_type() == DataType::S16)
+    LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+    _alpha_multipliers.resize(1);
+    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+                       &_alpha_multipliers[0].shift);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    // Common check for correctness of quant params
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+    for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
      {
-      LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && alpha()->zero_point() == 0 &&
-                             output()->zero_point() == 0);
+      LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
      }
-    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
-    quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+    // Prelu specific checks for CWQ
+    LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+    LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+                           alpha()->shape().dim(alpha()->quantized_dimension()));
+    LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+                           input()->shape().dim(input()->shape().num_dims() - 1));
+
+    // all dimension of alpha except last one should be size 1
+    for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+    }
+
+    std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+    _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
      double identity_multiplier = input()->scale() / output()->scale();
      quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
    }
@@ -84,9 +118,9 @@ void Prelu::evalFloat() const
    if (input()->shape() != alpha()->shape())
    {
      tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
-        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
-        getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
-        PreluFunc);
+      getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+      getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+      PreluFunc);
    }
    else
    {
@@ -109,44 +143,66 @@ void Prelu::evalQuantized() const
    op_params.output_offset = output()->zero_point();
    op_params.output_shift_1 = _output_shift_identity;
    op_params.output_multiplier_1 = _output_multiplier_identity;
-  op_params.output_shift_2 = _output_shift_alpha;
-  op_params.output_multiplier_2 = _output_multiplier_alpha;
+  op_params.output_shift_2 = _alpha_multipliers[0].shift;
+  op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
  
    if (input()->shape() != alpha()->shape())
    {
      tflite::reference_ops::BroadcastPrelu4DSlow(
-        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-        getTensorShape(alpha()), getTensorData<uint8_t>(alpha()), getTensorShape(output()),
-        getTensorData<uint8_t>(output()));
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
    }
    else
    {
-    tflite::reference_ops::Prelu<uint8_t>(op_params, getTensorShape(input()),
-                                          getTensorData<uint8_t>(input()), getTensorShape(alpha()),
-                                          getTensorData<uint8_t>(alpha()), getTensorShape(output()),
-                                          getTensorData<uint8_t>(output()));
+    tflite::reference_ops::Prelu<uint8_t>(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
    }
  }
  
-void Prelu::evalQuantizedS16() const
+static inline int16_t evalElemS16Prelu(int16_t input_val, int16_t alpha_val,
+                                       const ChannelQuantMultipliers &identity_mult,
+                                       const ChannelQuantMultipliers &alpha_mult)
  {
    constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
    constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
  
-  auto fn = [this, quantized_min, quantized_max](int16_t input_val, int16_t alpha_val) {
-    const int32_t output_val =
-        input_val >= 0
-            ? tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier_identity,
-                                                    _output_shift_identity)
-            : tflite::MultiplyByQuantizedMultiplier(input_val * alpha_val, _output_multiplier_alpha,
-                                                    _output_shift_alpha);
-    const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
-    return static_cast<int16_t>(clamped_output);
-  };
-
-  BinaryOpBroadcastSlow(getTensorShape(input()), getTensorData<int16_t>(input()),
-                        getTensorShape(alpha()), getTensorData<int16_t>(alpha()),
-                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+  const int32_t output_val =
+    input_val >= 0 ? tflite::MultiplyByQuantizedMultiplier(input_val, identity_mult.multiplier,
+                                                           identity_mult.shift)
+                   : tflite::MultiplyByQuantizedMultiplier(input_val * alpha_val,
+                                                           alpha_mult.multiplier, alpha_mult.shift);
+  const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+  return clamped_output;
+}
+
+void Prelu::evalQuantizedS16() const
+{
+  // Note that this kernel assumes alpha is CWQ
+  tflite::RuntimeShape input_shape = getTensorShape(input());
+  const int16_t *input_data = input()->data<int16_t>();
+  const int16_t *alpha_data = alpha()->data<int16_t>();
+  int16_t *output_data = output()->data<int16_t>();
+
+  const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+  const int last_dim = input()->shape().num_dims() - 1;
+
+  int32_t outer_dims_size = 1;
+  for (int i = 0; i < last_dim; ++i)
+    outer_dims_size *= input_shape.Dims(i);
+  int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+  for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+    for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+    {
+      const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+      size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+      offset += quant_channel;
+
+      output_data[offset] =
+        evalElemS16Prelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+    }
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Prelu.h b/compiler/luci-interpreter/src/kernels/Prelu.h

index c7911a63faccccc9cdf32c51939a1f81ba847ac7..08cb0eaa5bfd5703103341fcb59374876f8d2b0d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Prelu.h
+++ b/compiler/luci-interpreter/src/kernels/Prelu.h
@@ -18,17 +18,22 @@
  #define LUCI_INTERPRETER_KERNELS_PRELU_H
  
  #include "core/Kernel.h"
+#include <vector>
  
  namespace luci_interpreter
  {
  namespace kernels
  {
  
+class ChannelQuantMultipliers;
+
  class Prelu : public Kernel
  {
  public:
    Prelu(const Tensor *input, const Tensor *alpha, Tensor *output);
  
+  ~Prelu();
+
    const Tensor *input() const { return _inputs[0]; }
    const Tensor *alpha() const { return _inputs[1]; }
    Tensor *output() const { return _outputs[0]; }
@@ -42,8 +47,8 @@ private:
    void evalQuantizedS16() const;
  
  private:
-  int32_t _output_multiplier_alpha = 0;
-  int32_t _output_shift_alpha = 0;
+  std::vector<ChannelQuantMultipliers> _alpha_multipliers;
+  // TODO merge this into one ChannelQuantMultiplier object
    int32_t _output_multiplier_identity = 0;
    int32_t _output_shift_identity = 0;
  };
diff --git a/compiler/luci-interpreter/src/kernels/Prelu.test.cpp b/compiler/luci-interpreter/src/kernels/Prelu.test.cpp

index 30702c826befb6b609116f26a0d5e348864c0e9b..9d9adf66f192012b937b1227a5698a810edff15a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Prelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Prelu.test.cpp
@@ -52,18 +52,18 @@ TEST(PreluTest, FloatSimple)
                 /*output_shape=*/{2, 3},
                 /*input_data=*/
                 {
-                   0.0f, 1.0f, 3.0f,   // Row 1
-                   1.0f, -1.0f, -2.0f, // Row 2
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
                 },
                 /*alpha_data=*/
                 {
-                   0.0f, 0.5f, 0.1f, // Row 1
-                   0.0f, 0.5f, 0.1f, // Row 2
+                 0.0f, 0.5f, 0.1f, // Row 1
+                 0.0f, 0.5f, 0.1f, // Row 2
                 },
                 /*output_data=*/
                 {
-                   0.0f, 1.0f, 3.0f,   // Row 1
-                   1.0f, -0.5f, -0.2f, // Row 2
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -0.5f, -0.2f, // Row 2
                 });
  
    SUCCEED();
@@ -75,19 +75,19 @@ TEST(PreluTest, FloatBroadcast)
                 /*output_shape=*/{1, 2, 2, 3},
                 /*input_data=*/
                 {
-                   0.0f, 0.0f, 0.0f,    // Row 1, Column 1
-                   1.0f, 1.0f, 1.0f,    // Row 1, Column 2
-                   -1.0f, -1.0f, -1.0f, // Row 2, Column 1
-                   -2.0f, -2.0f, -2.0f, // Row 2, Column 2
+                 0.0f, 0.0f, 0.0f,    // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,    // Row 1, Column 2
+                 -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+                 -2.0f, -2.0f, -2.0f, // Row 2, Column 2
                 },
                 /*alpha_data=*/
                 {0.0f, 1.0f, 2.0f},
                 /*output_data=*/
                 {
-                   0.0f, 0.0f, 0.0f,   // Row 1, Column 1
-                   1.0f, 1.0f, 1.0f,   // Row 1, Column 2
-                   0.0f, -1.0f, -2.0f, // Row 2, Column 1
-                   0.0f, -2.0f, -4.0f, // Row 2, Column 2
+                 0.0f, 0.0f, 0.0f,   // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,   // Row 1, Column 2
+                 0.0f, -1.0f, -2.0f, // Row 2, Column 1
+                 0.0f, -2.0f, -4.0f, // Row 2, Column 2
                 });
  
    SUCCEED();
@@ -104,10 +104,10 @@ TEST(PreluTest, Uint8Simple)
    float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
-                                                      quant_param.second, input_data);
-  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
-                                                      quant_param.second, alpha_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -124,33 +124,33 @@ TEST(PreluTest, Uint8Simple)
  TEST(PreluTest, Uint8Broadcast)
  {
    std::vector<float> input_data{
-      0.0f,   0.0f,   0.0f,   // Row 1, Column 1
-      0.5f,   0.5f,   0.5f,   // Row 1, Column 2
-      -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
-      -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
    };
    std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
    std::vector<float> ref_output_data{
-      0.0f, 0.0f,    0.0f,  // Row 1, Column 1
-      0.5f, 0.5f,    0.5f,  // Row 1, Column 2
-      0.0f, -0.5f,   0.5f,  // Row 2, Column 1
-      0.0f, -0.125f, 0.125f // Row 2, Column 2
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
    };
    std::vector<float> ref_quant_output_data{
-      128, 128, 128, // Row 1, Column 1
-      192, 192, 192, // Row 1, Column 2
-      128, 64,  192, // Row 2, Column 1
-      128, 112, 144  // Row 2, Column 2
+    128, 128, 128, // Row 1, Column 1
+    192, 192, 192, // Row 1, Column 2
+    128, 64,  192, // Row 2, Column 1
+    128, 112, 144  // Row 2, Column 2
    };
    float kQuantizedTolerance = 2 * (1. / 256);
    const float kMin = -1;
    const float kMax = 127.f / 128.f;
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 3}, quant_param.first,
-                                                      quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 2, 3}, quant_param.first, quant_param.second, input_data);
    Tensor alpha_tensor =
-      makeInputTensor<DataType::U8>({1, 1, 3}, quant_param.first, quant_param.second, alpha_data);
+    makeInputTensor<DataType::U8>({1, 1, 3}, quant_param.first, quant_param.second, alpha_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -164,42 +164,114 @@ TEST(PreluTest, Uint8Broadcast)
                ::testing::ElementsAreArray(ref_quant_output_data));
  }
  
-TEST(PreluTest, SInt16Simple)
+TEST(PreluTest, SInt16_LWQ_NEG)
  {
-  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
-  std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
-  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+  // Rewrite this test in case layer-wise quantization for sint16 is supported
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
  
    Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data);
    Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data);
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
  
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, SInt16_CWQ_Simple)
+{
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
    Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    kernel.configure();
    kernel.execute();
  
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PreluTest, SInt16_CWQ_spatial_alpha_NEG)
+{
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f, 0.05f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3, alpha_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, SInt16_CWQ_wrong_dim_quant_NEG)
+{
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f};
+  std::vector<int32_t> zerop{0};
+  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1, alpha_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, SInt16_CWQ_uneven_shape1)
+{
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2, alpha_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(PreluTest, SInt16Broadcast)
+TEST(PreluTest, SInt16_CWQ_uneven_shape2)
  {
    std::vector<float> input_data{
-      0.0f,   0.0f,   0.0f,   // Row 1, Column 1
-      0.5f,   0.5f,   0.5f,   // Row 1, Column 2
-      -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
-      -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
    };
    std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
    std::vector<float> ref_output_data{
-      0.0f, 0.0f,    0.0f,  // Row 1, Column 1
-      0.5f, 0.5f,    0.5f,  // Row 1, Column 2
-      0.0f, -0.5f,   0.5f,  // Row 2, Column 1
-      0.0f, -0.125f, 0.125f // Row 2, Column 2
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
    };
  
+  std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
+  std::vector<int32_t> zerop{0, 0, 0};
    Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data);
-  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3}, 0.1, 0, alpha_data);
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3, alpha_data);
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
  
    Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -241,6 +313,43 @@ TEST(PreluTest, Invalid_Input_Type_NEG)
    EXPECT_ANY_THROW(kernel.execute());
  }
  
+TEST(PreluTest, Input_Output_U8_CWQ_NEG)
+{
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
+  Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, Input_Output_S16_CWQ_NEG)
+{
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
+  Tensor output_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, Mixing_U8_S16_NEG)
+{
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data);
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data);
+  Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-interpreter/src/kernels/Relu.cpp

index a2e02d7089d5d2bbe02db89cf5c10c083ddb884b..b5acf1d60672b731599835c9cdd7c439b995601b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Relu.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu.cpp
@@ -82,7 +82,7 @@ void Relu::evalQuantized() const
    params.output_shift = _output_shift;
  
    params.quantized_activation_min =
-      std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
    params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
  
    tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
@@ -103,7 +103,7 @@ void Relu::evalQuantizedS16() const
    {
      const int32_t input_val = input_data[i];
      int32_t output_val =
-        tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+      tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
      output_val = std::max(output_val, output_min);
      output_val = std::min(output_val, output_max);
      output_data[i] = static_cast<int16_t>(output_val);
diff --git a/compiler/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-interpreter/src/kernels/Relu.test.cpp

index cabefa73385ea8e58c3d371c0a8e822267ac1aab..6623a5b777e5e4df1e5f785d3ac9246030c2097a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Relu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu.test.cpp
@@ -30,13 +30,13 @@ using namespace testing;
  TEST(ReluTest, FloatSimple)
  {
    std::vector<float> input_data{
-      0.0f, 1.0f,  3.0f,  // Row 1
-      1.0f, -1.0f, -2.0f, // Row 2
+    0.0f, 1.0f,  3.0f,  // Row 1
+    1.0f, -1.0f, -2.0f, // Row 2
    };
  
    std::vector<float> ref_output_data{
-      0.0f, 1.0f, 3.0f, // Row 1
-      1.0f, 0.0f, 0.0f, // Row 2
+    0.0f, 1.0f, 3.0f, // Row 1
+    1.0f, 0.0f, 0.0f, // Row 2
    };
  
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
@@ -54,16 +54,16 @@ TEST(ReluTest, FloatSimple)
  TEST(ReluTest, Uint8Quantized)
  {
    std::vector<float> input_data{
-      0, -6, 2, 4, //
-      3, -2, 7, 1, //
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
    };
    // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
    const float f_min = (-128.0 / 128.0) * 8;
    const float f_max = (127.0 / 128.0) * 8;
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
-                                                      quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Relu kernel(&input_tensor, &output_tensor);
@@ -79,8 +79,8 @@ TEST(ReluTest, Uint8Quantized)
  TEST(ReluTest, Uint8Requantized)
  {
    std::vector<float> input_data{
-      0, -6, 2, 4, //
-      3, -2, 7, 1, //
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
    };
  
    // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
@@ -90,8 +90,8 @@ TEST(ReluTest, Uint8Requantized)
    const float out_max = (255.0 / 256.0) * 8;
  
    std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first,
-                                                      quant_input.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data);
  
    std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
@@ -109,12 +109,12 @@ TEST(ReluTest, Uint8Requantized)
  TEST(ReluTest, SInt16)
  {
    std::vector<float> input_data{
-      0, -6, 2, 4, //
-      3, -2, 7, 1, //
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
    };
    std::vector<float> ref_output_data{
-      0, 0, 2, 4, //
-      3, 0, 7, 1, //
+    0, 0, 2, 4, //
+    3, 0, 7, 1, //
    };
  
    Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data);
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-interpreter/src/kernels/Relu6.cpp

index 1046ef27b4d6039c5750d286413998f1393a30d7..fa7aa504ab880d9b477ada77f5d7d99cb8df6d3b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Relu6.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu6.cpp
@@ -75,10 +75,10 @@ void Relu6::evalQuantized() const
    params.output_shift = _output_shift;
  
    params.quantized_activation_min =
-      std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
    params.quantized_activation_max =
-      std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
-               params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+    std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+             params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
  
    tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
                                 getTensorShape(output()), getTensorData<uint8_t>(output()));
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp

index a7f104d855ac8374d6c749cf03c1d70c5cb3f5cc..fe991389a22970a4fb40ba2550eeba6e8b0d3c5f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -30,13 +30,13 @@ using namespace testing;
  TEST(Relu6Test, FloatSimple)
  {
    std::vector<float> input_data{
-      0.0f, 1.0f,  3.0f,  // Row 1
-      7.0f, -1.0f, -2.0f, // Row 2
+    0.0f, 1.0f,  3.0f,  // Row 1
+    7.0f, -1.0f, -2.0f, // Row 2
    };
  
    std::vector<float> ref_output_data{
-      0.0f, 1.0f, 3.0f, // Row 1
-      6.0f, 0.0f, 0.0f, // Row 2
+    0.0f, 1.0f, 3.0f, // Row 1
+    6.0f, 0.0f, 0.0f, // Row 2
    };
  
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
@@ -59,13 +59,13 @@ TEST(Relu6Test, Uint8Quantized)
    const float tolerance = (f_max - f_min) / 255.0;
  
    std::vector<float> input_data{
-      0,  -6, 2, 8, //
-      -2, 3,  7, 1, //
+    0,  -6, 2, 8, //
+    -2, 3,  7, 1, //
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
-                                                      quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Relu6 kernel(&input_tensor, &output_tensor);
@@ -89,13 +89,13 @@ TEST(Relu6Test, Uint8Requantized)
    const float tolerance = (in_max - in_min) / 255.0;
  
    std::vector<float> input_data{
-      0,  -6, 2, 8, //
-      -2, 3,  7, 1, //
+    0,  -6, 2, 8, //
+    -2, 3,  7, 1, //
    };
  
    std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first,
-                                                      quant_input.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data);
  
    std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-interpreter/src/kernels/Reshape.cpp

index d88b5392a849d46efad78349663ef7a2ab2029bf..61d3300b29b95c4f6d43219edc9899acf3fa02a0 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Reshape.cpp
+++ b/compiler/luci-interpreter/src/kernels/Reshape.cpp
@@ -65,7 +65,7 @@ static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shap
  }
  
  Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
-    : Kernel({input, shape}, {output})
+  : Kernel({input, shape}, {output})
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp

index 9385855cf3e5cac8509bc4689504b1351f06b391..0e9bcc9200be65d46e8f13cab72b3146c6744f78 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -28,7 +28,7 @@ namespace kernels
  
  ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
                                 const ResizeBilinearParams &params)
-    : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+  : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
  {
  }
  
@@ -57,14 +57,13 @@ void ResizeBilinear::execute() const
    {
      case DataType::FLOAT32:
        tflite::optimized_ops::ResizeBilinear(
-          op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
-          getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
        break;
      case DataType::U8:
        tflite::optimized_ops::ResizeBilinear(
-          op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-          getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
-          getTensorData<uint8_t>(output()));
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp

index 51c1359da3c5082f419b7a43dc226948ffd96d60..68ef6e6c100b302760deb72cb3745373c0367469 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -88,19 +88,19 @@ TYPED_TEST(ResizeBilinearTest, SimpleTest)
  {
    Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
                     {
-                       3, 6,  //
-                       9, 12, //
-                       4, 10, //
-                       10, 16 //
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
                     },
                     {3, 3},
                     {
-                       3, 5, 6,    //
-                       7, 9, 10,   //
-                       9, 11, 12,  //
-                       4, 8, 10,   //
-                       8, 12, 14,  //
-                       10, 14, 16, //
+                     3, 5, 6,    //
+                     7, 9, 10,   //
+                     9, 11, 12,  //
+                     4, 8, 10,   //
+                     8, 12, 14,  //
+                     10, 14, 16, //
                     },
                     false, false);
    SUCCEED();
@@ -110,19 +110,19 @@ TEST(ResizeBilinearTest, HalfPixelCenterFloatTest)
  {
    Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
                 {
-                   1, 2, //
-                   3, 4, //
-                   1, 2, //
-                   3, 4  //
+                 1, 2, //
+                 3, 4, //
+                 1, 2, //
+                 3, 4  //
                 },
                 {3, 3},
                 {
-                   1, 1.5, 2, //
-                   2, 2.5, 3, //
-                   3, 3.5, 4, //
-                   1, 1.5, 2, //
-                   2, 2.5, 3, //
-                   3, 3.5, 4, //
+                 1, 1.5, 2, //
+                 2, 2.5, 3, //
+                 3, 3.5, 4, //
+                 1, 1.5, 2, //
+                 2, 2.5, 3, //
+                 3, 3.5, 4, //
                 },
                 false, true);
    SUCCEED();
@@ -132,19 +132,19 @@ TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
  {
    Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
                   {
-                     3, 6,  //
-                     9, 12, //
-                     4, 10, //
-                     12, 16 //
+                   3, 6,  //
+                   9, 12, //
+                   4, 10, //
+                   12, 16 //
                   },
                   {3, 3},
                   {
-                     2, 4, 6,    //
-                     6, 7, 9,    //
-                     9, 10, 12,  //
-                     4, 7, 10,   //
-                     8, 10, 13,  //
-                     12, 14, 16, //
+                   2, 4, 6,    //
+                   6, 7, 9,    //
+                   9, 10, 12,  //
+                   4, 7, 10,   //
+                   8, 10, 13,  //
+                   12, 14, 16, //
                   },
                   false, true);
    SUCCEED();
@@ -153,10 +153,10 @@ TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
  TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
-                                                                          3, 6,  //
-                                                                          9, 12, //
-                                                                          4, 10, //
-                                                                          10, 16 //
+                                                                        3, 6,  //
+                                                                        9, 12, //
+                                                                        4, 10, //
+                                                                        10, 16 //
                                                                        });
    Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -172,10 +172,10 @@ TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
  TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                             3, 6,  //
-                                                                             9, 12, //
-                                                                             4, 10, //
-                                                                             10, 16 //
+                                                                           3, 6,  //
+                                                                           9, 12, //
+                                                                           4, 10, //
+                                                                           10, 16 //
                                                                           });
    Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -191,10 +191,10 @@ TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
  TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                             3, 6,  //
-                                                                             9, 12, //
-                                                                             4, 10, //
-                                                                             10, 16 //
+                                                                           3, 6,  //
+                                                                           9, 12, //
+                                                                           4, 10, //
+                                                                           10, 16 //
                                                                           });
    Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -210,10 +210,10 @@ TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
  TEST(ResizeBilinearTest, InvalidParams_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                             3, 6,  //
-                                                                             9, 12, //
-                                                                             4, 10, //
-                                                                             10, 16 //
+                                                                           3, 6,  //
+                                                                           9, 12, //
+                                                                           4, 10, //
+                                                                           10, 16 //
                                                                           });
    Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp

index e4ad8f7424f497df08839cd6c885a822faf3a521..c5226499708579f462a9fd1472c94240f48243f7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -30,7 +30,7 @@ namespace kernels
  ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
                                               Tensor *output,
                                               const ResizeNearestNeighborParams &params)
-    : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+  : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
  {
  }
  
@@ -57,15 +57,13 @@ void ResizeNearestNeighbor::execute() const
    {
      case DataType::FLOAT32:
        tflite::reference_ops::ResizeNearestNeighbor(
-          op_params, getTensorShape(input()), getTensorData<int32_t>(input()),
-          getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
-          getTensorData<int32_t>(output()));
+        op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
        break;
      case DataType::U8:
        tflite::optimized_ops::ResizeNearestNeighbor(
-          op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-          getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
-          getTensorData<uint8_t>(output()));
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp

index 9a804cca7d8f8b6f7e4dab5d8bdc03cfa1efbd45..0b36a29afd3a076bacb51b7dc1300cc6489daa47 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -59,10 +59,10 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
                      bool half_pixel_centers)
  {
    std::pair<float, int32_t> quant_param =
-      quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
-                                  std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
    Tensor input_tensor =
-      makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+    makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
    Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
  
@@ -90,19 +90,19 @@ TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
  {
    Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
                     {
-                       3, 6,  //
-                       9, 12, //
-                       4, 10, //
-                       10, 16 //
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
                     },
                     {3, 3},
                     {
-                       3, 3, 6,    //
-                       3, 3, 6,    //
-                       9, 9, 12,   //
-                       4, 4, 10,   //
-                       4, 4, 10,   //
-                       10, 10, 16, //
+                     3, 3, 6,    //
+                     3, 3, 6,    //
+                     9, 9, 12,   //
+                     4, 4, 10,   //
+                     4, 4, 10,   //
+                     10, 10, 16, //
                     },
                     false, false);
  }
@@ -111,19 +111,19 @@ TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest)
  {
    Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
                     {
-                       3, 6,  //
-                       9, 12, //
-                       4, 10, //
-                       10, 16 //
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
                     },
                     {3, 3},
                     {
-                       3, 6, 6,    //
-                       9, 12, 12,  //
-                       9, 12, 12,  //
-                       4, 10, 10,  //
-                       10, 16, 16, //
-                       10, 16, 16, //
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
                     },
                     true, false);
  }
@@ -132,19 +132,19 @@ TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
  {
    Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
                     {
-                       3, 6,  //
-                       9, 12, //
-                       4, 10, //
-                       10, 16 //
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
                     },
                     {3, 3},
                     {
-                       3, 6, 6,    //
-                       9, 12, 12,  //
-                       9, 12, 12,  //
-                       4, 10, 10,  //
-                       10, 16, 16, //
-                       10, 16, 16, //
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
                     },
                     false, true);
  }
@@ -152,10 +152,10 @@ TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
  TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
-                                                                          3, 6,  //
-                                                                          9, 12, //
-                                                                          4, 10, //
-                                                                          10, 16 //
+                                                                        3, 6,  //
+                                                                        9, 12, //
+                                                                        4, 10, //
+                                                                        10, 16 //
                                                                        });
    Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -171,10 +171,10 @@ TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
  TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                             3, 6,  //
-                                                                             9, 12, //
-                                                                             4, 10, //
-                                                                             10, 16 //
+                                                                           3, 6,  //
+                                                                           9, 12, //
+                                                                           4, 10, //
+                                                                           10, 16 //
                                                                           });
    Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -190,10 +190,10 @@ TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
  TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
  {
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                             3, 6,  //
-                                                                             9, 12, //
-                                                                             4, 10, //
-                                                                             10, 16 //
+                                                                           3, 6,  //
+                                                                           9, 12, //
+                                                                           4, 10, //
+                                                                           10, 16 //
                                                                           });
    Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
diff --git a/compiler/luci-interpreter/src/kernels/Reverse.cpp b/compiler/luci-interpreter/src/kernels/Reverse.cpp

index a463084121eb451457e29cae66b7241c2d95b71a..e9893fadc355e29c906856dce65d698e5a56b8b8 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Reverse.cpp
+++ b/compiler/luci-interpreter/src/kernels/Reverse.cpp
@@ -25,7 +25,7 @@ namespace kernels
  {
  
  Reverse::Reverse(const Tensor *input, const Tensor *axes, Tensor *output)
-    : Kernel({input, axes}, {output})
+  : Kernel({input, axes}, {output})
  {
  }
  
@@ -69,8 +69,8 @@ void Reverse::execute() const
        break;
      case DataType::U8:
        tflite::reference_ops::Reverse<uint8_t>(
-          axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
-          getTensorShape(output()), getTensorData<uint8_t>(output()));
+        axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(output()), getTensorData<uint8_t>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported output type");
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp

index d33b800be9937902e1992127bd59ace262d8d89c..b93a04ddd47e5ac142e4eea444bd6cabbc42d7ce 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -43,17 +43,17 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  TEST(RsqrtTest, SimpleRsqrt)
  {
    Check(
-      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
-      /*input_data=*/
-      {
-          5, 4, 8, 2,     //
-          6, 7.5, 9, 0.3, //
-      },
-      /*output_data=*/
-      {
-          0.44721360, 0.5, 0.35355339, 0.70710678,       //
-          0.40824829, 0.36514837, 0.33333333, 1.8257419, //
-      });
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      5, 4, 8, 2,     //
+      6, 7.5, 9, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.44721360, 0.5, 0.35355339, 0.70710678,       //
+      0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+    });
  }
  
  TEST(RsqrtTest, Input_Output_Type_NEG)
diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp

index c4bc3c57cd31ca95a1d9ca3105d7093a3ef70dd5..626521815da9e46b8e608bc395305bba978e9c63 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Slice.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.cpp
@@ -29,7 +29,7 @@ namespace kernels
  const int max_dim = 4;
  
  Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
-    : Kernel({input, begin, size}, {output})
+  : Kernel({input, begin, size}, {output})
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-interpreter/src/kernels/Softmax.cpp

index 642c0ad757fa615a530e7bff5eceb64b2bfb1e9b..8e29f53eefb8c7bf4a7df3bf74c33a155da4d147 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Softmax.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.cpp
@@ -30,7 +30,7 @@ namespace kernels
  {
  
  Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params)
-    : KernelWithParams<SoftmaxParams>({input}, {output}, params)
+  : KernelWithParams<SoftmaxParams>({input}, {output}, params)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp

index d3d8209a5cac839c10667693b00c06637d7ff29b..c69a2f9ccd1e6542479250056cca6d7b1c4c2379 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -51,15 +51,15 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
                      std::initializer_list<float> output_data)
  {
    std::pair<float, int32_t> input_quant_param =
-      quantizationParams<uint8_t>(std::min<float>(std::min<float>(input_data), 0.f),
-                                  std::max<float>(std::max<float>(input_data), 0.f));
+    quantizationParams<uint8_t>(std::min<float>(std::min<float>(input_data), 0.f),
+                                std::max<float>(std::max<float>(input_data), 0.f));
    std::pair<float, int32_t> output_quant_param =
-      quantizationParams<uint8_t>(std::min<float>(std::min<float>(output_data), 0.f),
-                                  std::max<float>(std::max<float>(output_data), 0.f));
+    quantizationParams<uint8_t>(std::min<float>(std::min<float>(output_data), 0.f),
+                                std::max<float>(std::max<float>(output_data), 0.f));
    Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
                                                        input_quant_param.second, input_data);
    Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
    SoftmaxParams params{};
    params.beta = 0.1;
@@ -84,16 +84,16 @@ TYPED_TEST(SoftmaxTest, Simple)
  {
    Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
                     {
-                       5, -9, 8,  //
-                       -7, 2, -4, //
-                       1, -2, 9,  //
-                       3, -6, -1, //
+                     5, -9, 8,  //
+                     -7, 2, -4, //
+                     1, -2, 9,  //
+                     3, -6, -1, //
                     },
                     {
-                       0.38514, 0.09497, 0.51989, //
-                       0.20792, 0.51141, 0.28067, //
-                       0.25212, 0.18678, 0.56110, //
-                       0.48149, 0.19576, 0.32275, //
+                     0.38514, 0.09497, 0.51989, //
+                     0.20792, 0.51141, 0.28067, //
+                     0.25212, 0.18678, 0.56110, //
+                     0.48149, 0.19576, 0.32275, //
                     });
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp

new file mode 100644 (file)

index 0000000..2f6a479
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+                               const Tensor *paddings, Tensor *output)
+  : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *paddings_data = paddings()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    int final_dim_size =
+      (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+    LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+    output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+    output_batch_size = output_batch_size * block_shape_data[i];
+  }
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+  switch (input()->element_type())
+  {
+    tflite::SpaceToBatchParams op_params;
+    case DataType::FLOAT32:
+      op_params.output_offset = 0;
+      tflite::optimized_ops::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<float>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      op_params.output_offset = output()->zero_point();
+      tflite::optimized_ops::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h

new file mode 100644 (file)

index 0000000..0893003
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToBatchND : public Kernel
+{
+public:
+  SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *paddings() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp

new file mode 100644 (file)

index 0000000..a6ec6f2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> paddings_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
+  Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data);
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape,
+  std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
+  std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
+                                                      input_quant_param.second, input_data);
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
+  Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data);
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class SpaceToBatchNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(SpaceToBatchNDTest, DataTypes);
+
+TYPED_TEST(SpaceToBatchNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2},
+                   /*paddings_shape=*/{2, 2},
+                   /*output_shape=*/{6, 2, 2, 1},
+                   /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0},
+                   /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0},
+                   /*output_data=*/{0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -1.0, 0, -0.7,
+                                    0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 1.0});
+}
+
+TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
+  Tensor paddings_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp

index 6a5bd7cf8f82a9383c4ac9447f869ba216ebb06e..fc999372a8034c2a3b2c9dbdaa0b223cf9c26103 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -24,7 +24,7 @@ namespace kernels
  {
  
  SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
-    : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+  : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-interpreter/src/kernels/Split.cpp

index 325b1c22f73c37ef74e7a99a6ec5237788cc9597..0da0f37794f26fe8e7b7b38f0153d8609fb0e032 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Split.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.cpp
@@ -26,7 +26,7 @@ namespace kernels
  {
  
  Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
-    : Kernel({axis, input}, std::move(outputs))
+  : Kernel({axis, input}, std::move(outputs))
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp

index 2147d15c1dba72f742c1a1e6cff32500e175bafc..c558928e8a490657a1e03aeddaac4e178726644d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Split.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -72,44 +72,48 @@ TYPED_TEST(SplitTest, FourDimensional)
    Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
                     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
                     {
-                       {1, 2, 3, 4, 5, 6, 7, 8},        //
-                       {9, 10, 11, 12, 13, 14, 15, 16}, //
+                     {1, 2, 3, 4, 5, 6, 7, 8},        //
+                     {9, 10, 11, 12, 13, 14, 15, 16}, //
                     });
    Check<TypeParam>(
-      /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
-                                                                   {1, 2, 3, 4, 9, 10, 11, 12},  //
-                                                                   {5, 6, 7, 8, 13, 14, 15, 16}, //
-                                                               });
+    /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 3, 4, 9, 10, 11, 12},  //
+      {5, 6, 7, 8, 13, 14, 15, 16}, //
+    });
    Check<TypeParam>(
-      /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
-                                                                   {1, 2, 5, 6, 9, 10, 13, 14},  //
-                                                                   {3, 4, 7, 8, 11, 12, 15, 16}, //
-                                                               });
+    /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 5, 6, 9, 10, 13, 14},  //
+      {3, 4, 7, 8, 11, 12, 15, 16}, //
+    });
    Check<TypeParam>(
-      /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
-                                                                   {1, 3, 5, 7, 9, 11, 13, 15},  //
-                                                                   {2, 4, 6, 8, 10, 12, 14, 16}, //
-                                                               });
+    /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 3, 5, 7, 9, 11, 13, 15},  //
+      {2, 4, 6, 8, 10, 12, 14, 16}, //
+    });
  }
  
  TYPED_TEST(SplitTest, OneDimensional)
  {
    Check<TypeParam>(
-      /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
-      {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
+    /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
+    {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
  }
  
  TYPED_TEST(SplitTest, NegativeAxis)
  {
    Check<TypeParam>(
-      /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
-                                                                   {1, 2, 3, 4, 5, 6, 7, 8}, //
-                                                                   {9, 10, 11, 12, 13, 14, 15, 16},
-                                                               });
+    /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 3, 4, 5, 6, 7, 8}, //
+      {9, 10, 11, 12, 13, 14, 15, 16},
+    });
  }
  
  } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp

index 504db4493162bb4a81fe25e444d7c0149156f76c..e40a91e97de98f4237ac1dd8871dbd641914b8d7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -43,17 +43,17 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  TEST(SqrtTest, SimpleSqrt)
  {
    Check(
-      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
-      /*input_data=*/
-      {
-          0, 8, 2, 4,    //
-          3, 7, 10, 0.3, //
-      },
-      /*output_data=*/
-      {
-          0.0, 2.8284271, 1.4142136, 2,                //
-          1.7320508, 2.6457513, 3.1622777, 0.54772256, //
-      });
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, 8, 2, 4,    //
+      3, 7, 10, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.0, 2.8284271, 1.4142136, 2,                //
+      1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+    });
  }
  
  TEST(SqrtTest, Input_Output_Type_NEG)
diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp

new file mode 100644 (file)

index 0000000..3bafeba
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalSquaredDifference<float>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+                          const T difference = x - y;
+                          return difference * difference;
+                        });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.h b/compiler/luci-interpreter/src/kernels/SquaredDifference.h

new file mode 100644 (file)

index 0000000..9327caf
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SquaredDifference : public Kernel
+{
+public:
+  SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalSquaredDifference() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp

new file mode 100644 (file)

index 0000000..a72eaad
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquaredDifferenceTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
+  Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(SquaredDifferenceTest, FloatBroadcast)
+{
+  Shape input_shape1{3, 1, 2};
+  Shape input_shape2{1};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{1.0};
+  Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1);
+  Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.cpp

index ce43ef78995e95fe1fd246c46444fa97d31dd558..4a75518c7194221ad3e52c478fcc489bb99c498d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Squeeze.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.cpp
@@ -27,7 +27,7 @@ namespace kernels
  {
  
  Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
-    : KernelWithParams<SqueezeParams>({input}, {output}, params)
+  : KernelWithParams<SqueezeParams>({input}, {output}, params)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp

index ff9fb09d22627a1d233d5325bd20f74329cba7ac..1c81893b9f4969a73d3c955a7e8a169d4ab89a60 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -56,12 +56,12 @@ TYPED_TEST_CASE(SqueezeTest, DataTypes);
  TYPED_TEST(SqueezeTest, TotalTest)
  {
    Check<TypeParam>(
-      /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
-      /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-                      13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
-      /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
-      {-1, 0});
+    /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                     13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    {-1, 0});
  }
  
  } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp

index 679485439f0887baa019df18adfa60e6c9da215c..37b0dd8c528c3e3ca2f083296edf462a25ead041 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -31,7 +31,7 @@ namespace kernels
  
  StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
                             const Tensor *strides, Tensor *output, const StridedSliceParams &params)
-    : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+  : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
  {
  }
  
@@ -82,7 +82,7 @@ void StridedSlice::configure()
      assert(stride != 0);
      int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
      int32_t end =
-        ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+      ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
  
      const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
      if (shrink_axis)
diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp

index dd9c1102f5c3173531f2f81cfe43479912b5be42..3c7588d6298136bcc6ca0bac14a9fb1890389d12 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sub.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.cpp
@@ -28,7 +28,7 @@ namespace kernels
  {
  
  Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
-    : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+  : KernelWithParams<SubParams>({input1, input2}, {output}, params)
  {
  }
  
@@ -64,13 +64,13 @@ void Sub::evalFloat() const
    params.float_activation_max = activation_max;
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
  
    if (need_broadcast)
    {
      tflite::reference_ops::BroadcastSubSlow(
-        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
    }
    else
    {
@@ -118,14 +118,13 @@ void Sub::evalQuantized() const
    params.quantized_activation_max = activation_max;
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
  
    if (need_broadcast)
    {
      tflite::reference_ops::BroadcastSubSlow(
-        params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
-        getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
-        getTensorData<uint8_t>(output()));
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
    }
    else
    {
diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp

index 9f77fe7e0c81e2f54be74fdc754a1d9939428882..f560ceb363251cc077aca44bd8935b8a1de563dc 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sub.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
@@ -49,25 +49,25 @@ TEST(SubTest, Uint8)
    vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
    vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
    vector<vector<float>> output_data = {
-      {-0.5f, 2.0f,  0.1f,  1.8f,  -1.3f, 1.4f,  0.7f, 0.2f,  1.3f, 0.0f,  -0.1f, -0.4f,
-       0.6f,  -1.4f, 1.2f,  -1.6f, -0.2f, -2.0f, 1.0f, 2.5f,  1.6f, 2.3f,  0.2f,  1.9f,
-       -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
-      {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
-      {-0.5f, 2.1f,  -0.6f, 2.0f,  0.1f,  2.7f,  0.7f, 0.3f,  0.6f,  0.2f,  1.3f,  0.9f,
-       0.6f,  -1.3f, 0.5f,  -1.4f, 1.2f,  -0.7f, 0.7f, 2.3f,  0.2f,  1.8f,  0.3f,  1.9f,
-       -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
-      {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
+    {-0.5f, 2.0f,  0.1f,  1.8f,  -1.3f, 1.4f,  0.7f, 0.2f,  1.3f, 0.0f,  -0.1f, -0.4f,
+     0.6f,  -1.4f, 1.2f,  -1.6f, -0.2f, -2.0f, 1.0f, 2.5f,  1.6f, 2.3f,  0.2f,  1.9f,
+     -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
+    {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
+    {-0.5f, 2.1f,  -0.6f, 2.0f,  0.1f,  2.7f,  0.7f, 0.3f,  0.6f,  0.2f,  1.3f,  0.9f,
+     0.6f,  -1.3f, 0.5f,  -1.4f, 1.2f,  -0.7f, 0.7f, 2.3f,  0.2f,  1.8f,  0.3f,  1.9f,
+     -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
+    {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
  
    float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
    pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
    for (size_t i = 0; i < output_data.size(); ++i)
    {
      Tensor input1_tensor =
-        makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
      Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
                                                           quant_param.second, test_data);
      Tensor output_tensor =
-        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
      SubParams params{};
      params.activation = Activation::NONE;
@@ -93,9 +93,9 @@ TEST(SubTest, Uint8)
      Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
                                                           quant_param.second, test_data);
      Tensor input2_tensor =
-        makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
      Tensor output_tensor =
-        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
      SubParams params{};
      params.activation = Activation::NONE;
@@ -116,14 +116,14 @@ TEST(SubTest, Float)
    vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
    vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
    vector<vector<float>> test_outputs = {
-      {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
-       0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
-       0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
-      {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
-      {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
-       0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
-       0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
-      {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
+    {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
+     0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+     0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
  
    vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                              1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
@@ -142,7 +142,7 @@ TEST(SubTest, Float)
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
-        << "With shape number " << i;
+      << "With shape number " << i;
  
      EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
    }
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp

index 17b50f259fccf4a9f170eff683a8789afb6140b6..ef727d6eb557cadfbed905aaf92ae935edaa3db1 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -31,8 +31,8 @@ TEST(TanhTest, Float)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
-      0, -6, 2,  4, //
-      3, -2, 10, 1, //
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -42,8 +42,8 @@ TEST(TanhTest, Float)
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      0,          -0.9999877, 0.9640275, 0.999329,  //
-      0.99505475, -0.9640275, 1,         0.7615941, //
+    0,          -0.9999877, 0.9640275, 0.999329,  //
+    0.99505475, -0.9640275, 1,         0.7615941, //
    };
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
  }
@@ -56,41 +56,41 @@ TEST(TanhTest, Uint8)
    std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
    std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
    std::vector<float> input_data{
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
    };
    Tensor input_tensor = makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first,
                                                        input_quant_param.second, input_data);
    Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
    Tanh kernel(&input_tensor, &output_tensor);
    kernel.configure();
    kernel.execute();
  
    std::vector<float> ref_output_data{
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
    };
    std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance));
@@ -100,18 +100,18 @@ TEST(TanhTest, Uint8)
  TEST(TanhTest, InputTypeInvalid_NEG)
  {
    std::vector<int64_t> input_data{
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
    };
    Tensor input_tensor = makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -123,18 +123,18 @@ TEST(TanhTest, InputTypeInvalid_NEG)
  TEST(TanhTest, InputOutputMismatch_NEG)
  {
    std::vector<float> input_data{
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
    };
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8);
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-interpreter/src/kernels/TestUtils.cpp

index c3c0b5a7db265885fb4fb65e6ca042355dd039bf..831dc4247b81496374cbad32c70f36f021aa0319 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TestUtils.cpp
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
@@ -84,7 +84,7 @@ std::vector<float> dequantizeTensorData(const Tensor &tensor)
          float scale = tensor.scales()[channel];
          size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
          std::vector<float> part_dequantized_data =
-            dequantize(data.data() + offset, inner_dims_size, scale, 0);
+          dequantize(data.data() + offset, inner_dims_size, scale, 0);
          dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
                                  part_dequantized_data.end());
        }
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-interpreter/src/kernels/TestUtils.h

index 1f17e39e187d57b27ac3cd66ffd6fa1cf5332f01..c4c73d5468a2ced600268101d17e12c55f6ff381 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TestUtils.h
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.h
@@ -59,7 +59,7 @@ Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
    using NativeT = typename DataTypeImpl<DT>::Type;
    Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
    std::vector<NativeT> quantized_data =
-      quantize<NativeT>(data.data(), data.size(), scale, zero_point);
+    quantize<NativeT>(data.data(), data.size(), scale, zero_point);
    tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
    return tensor;
  }
@@ -108,7 +108,7 @@ Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
        float scale = scales[channel];
        size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
        std::vector<NativeT> part_quantized_data =
-          quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
+        quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
        quantized_data.insert(quantized_data.end(), part_quantized_data.begin(),
                              part_quantized_data.end());
      }
@@ -172,7 +172,7 @@ std::vector<T> quantize(const float *data, size_t num_elements, float scale, int
    {
      const auto &f = data[i];
      q.push_back(static_cast<T>(
-        std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
+      std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
    }
    return q;
  }
@@ -233,8 +233,8 @@ template <typename T> std::pair<float, int32_t> quantizationParams(float f_min,
    const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
  
    const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error
-                                      ? zero_point_from_min
-                                      : zero_point_from_max;
+                                    ? zero_point_from_min
+                                    : zero_point_from_max;
  
    // Now we need to nudge the zero point to be an integer
    // (our zero points are integer, and this is motivated by the requirement
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-interpreter/src/kernels/Transpose.cpp

index 8265d9937988df89a4f705f2c40baa34e39a8f8a..c1a11cdb016b66b40d3c71b507b1bddfa512881f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Transpose.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.cpp
@@ -29,7 +29,7 @@ namespace kernels
  {
  
  Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
-    : Kernel({input, perm}, {output})
+  : Kernel({input, perm}, {output})
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp

index 1c99223a81f56e023d8ff992015172e609a1aef9..f0a915c35985a62fa04d127001344bd8507de6dc 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -63,46 +63,47 @@ TYPED_TEST(TransposeTest, Small3D)
  TYPED_TEST(TransposeTest, Large4D)
  {
    Check<TypeParam>(
-      /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
-      /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
-                      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
-                      30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
-                      45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
-                      60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
-                      75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
-                      90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
-                      105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
-      /*perm_data=*/{2, 0, 1, 3},
-      /*output_data=*/{0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
-                       60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
-                       5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
-                       65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
-                       10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
-                       70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
-                       15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
-                       75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
+    /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
+    /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                    30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                    45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                    60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                    90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+    /*perm_data=*/{2, 0, 1, 3},
+    /*output_data=*/{0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+                     60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+                     5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+                     65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+                     10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+                     70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+                     15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+                     75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
  }
  
  TYPED_TEST(TransposeTest, Large2D)
  {
    Check<TypeParam>(
-      /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
-      /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
-                      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
-                      30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
-                      45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
-                      60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
-                      75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
-                      90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
-                      105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
-      /*perm_data=*/{1, 0},
-      /*output_data=*/{
-          0,  12, 24, 36, 48, 60, 72, 84, 96,  108, 1,  13, 25, 37, 49, 61, 73, 85, 97,  109,
-          2,  14, 26, 38, 50, 62, 74, 86, 98,  110, 3,  15, 27, 39, 51, 63, 75, 87, 99,  111,
-          4,  16, 28, 40, 52, 64, 76, 88, 100, 112, 5,  17, 29, 41, 53, 65, 77, 89, 101, 113,
-          6,  18, 30, 42, 54, 66, 78, 90, 102, 114, 7,  19, 31, 43, 55, 67, 79, 91, 103, 115,
-          8,  20, 32, 44, 56, 68, 80, 92, 104, 116, 9,  21, 33, 45, 57, 69, 81, 93, 105, 117,
-          10, 22, 34, 46, 58, 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119});
+    /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
+    /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                    30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                    45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                    60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                    90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+    /*perm_data=*/{1, 0},
+    /*output_data=*/{0,  12, 24, 36,  48,  60, 72, 84, 96,  108, 1,  13, 25, 37,  49,
+                     61, 73, 85, 97,  109, 2,  14, 26, 38,  50,  62, 74, 86, 98,  110,
+                     3,  15, 27, 39,  51,  63, 75, 87, 99,  111, 4,  16, 28, 40,  52,
+                     64, 76, 88, 100, 112, 5,  17, 29, 41,  53,  65, 77, 89, 101, 113,
+                     6,  18, 30, 42,  54,  66, 78, 90, 102, 114, 7,  19, 31, 43,  55,
+                     67, 79, 91, 103, 115, 8,  20, 32, 44,  56,  68, 80, 92, 104, 116,
+                     9,  21, 33, 45,  57,  69, 81, 93, 105, 117, 10, 22, 34, 46,  58,
+                     70, 82, 94, 106, 118, 11, 23, 35, 47,  59,  71, 83, 95, 107, 119});
  }
  
  } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp

index 491ae51ae5201c99e856a4be374feea14df315a9..0c70756b236ce92ab4be1dd41353bb8524a4727f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -31,7 +31,7 @@ namespace kernels
  
  TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
                               const Tensor *bias, Tensor *output, const TransposeConvParams &params)
-    : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
+  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
  {
  }
  
@@ -63,23 +63,23 @@ void TransposeConv::configure()
    const int32_t output_width = out_shape.dim(2);
  
    const int32_t unused_output_height =
-      computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+    computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
    const int32_t unused_output_width =
-      computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+    computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
  
    _padding_height =
-      computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+    computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
    _padding_width =
-      computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+    computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
  
    if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
    {
      DataType scratch_data_type =
-        input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+      input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
      _scratch_tensor =
-        std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, "");
+      std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, "");
      const std::vector<double> real_multipliers =
-        getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
  
      _quant_multipliers = quantizeMultipliers(real_multipliers);
    }
@@ -210,12 +210,12 @@ void TransposeConv::evalQuantizedPerChannel() const
                  for (int32_t out_c = 0; out_c < output_depth; ++out_c)
                  {
                    const uint8_t input_val =
-                      input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
                    const uint8_t filter_val =
-                      filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
                    scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
-                      static_cast<int32_t>(input_val - input()->zero_point()) *
-                      static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                    static_cast<int32_t>(input_val - input()->zero_point()) *
+                    static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
                  }
                }
              }
@@ -236,7 +236,7 @@ void TransposeConv::evalQuantizedPerChannel() const
            }
  
            int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
-              acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
  
            scaled_acc += output()->zero_point();
            scaled_acc = std::max(scaled_acc, activation_min);
@@ -302,11 +302,11 @@ void TransposeConv::evalQuantizedS16() const
                  for (int32_t out_c = 0; out_c < output_depth; ++out_c)
                  {
                    const int16_t input_val =
-                      input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
                    const int16_t filter_val =
-                      filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
                    scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
-                      static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                    static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
                  }
                }
              }
@@ -326,7 +326,7 @@ void TransposeConv::evalQuantizedS16() const
              acc += bias_data[out_c];
            }
            int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
-              acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
  
            scaled_acc = std::max(scaled_acc, activation_min);
            scaled_acc = std::min(scaled_acc, activation_max);
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp

index b1309c12888152c07ea31b5e8d2328c7c260d854..9bcb015c1bc17200b680007dcfd012d5d8145303 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -37,7 +37,7 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
  {
    constexpr DataType element_type = getElementType<T>();
    Tensor output_shape_tensor =
-      makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data);
+    makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data);
    Tensor weight_tensor = makeInputTensor<element_type>(weight_shape, weight_data);
    Tensor input_data_tensor = makeInputTensor<element_type>(input_shape, input_data);
    Tensor output_tensor = makeOutputTensor(element_type);
@@ -68,13 +68,13 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
  TEST(TransposeConvTest, FloatSimple)
  {
    Check<float, float>(
-      /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
-      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
-      /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
-      /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      /*bias_data=*/{},
-      /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
-      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*bias_data=*/{},
+    /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
  
    SUCCEED();
  }
@@ -82,15 +82,15 @@ TEST(TransposeConvTest, FloatSimple)
  TEST(TransposeConvTest, FloatTwoFiltersTest)
  {
    Check<float, float>(
-      /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
-      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
-      /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
-      /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
-                      17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
-      /*bias_data=*/{},
-      /*output_data=*/
-      {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
-      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+    /*bias_data=*/{},
+    /*output_data=*/
+    {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
  
    SUCCEED();
  }
@@ -98,16 +98,16 @@ TEST(TransposeConvTest, FloatTwoFiltersTest)
  TEST(TransposeConvTest, SimpleBiasTest)
  {
    Check<float, float>(
-      /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
-      /*input_shape=*/{1, 2, 2, 1},
-      /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
-      /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
-      /*input_data=*/{1, 2, 3, 4},
-      /*bias_data=*/{3, 4},
-      /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
-                       24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
-                       64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
-      /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+    /*input_shape=*/{1, 2, 2, 1},
+    /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
+    /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+    /*input_data=*/{1, 2, 3, 4},
+    /*bias_data=*/{3, 4},
+    /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
+                     24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
+                     64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+    /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
  
    SUCCEED();
  }
@@ -119,11 +119,11 @@ TEST(TransposeConvTest, UInt8)
    std::vector<float> bias_data{3, 4};
    std::vector<int32_t> output_shape_data{1, 5, 5, 2};
    std::vector<float> ref_output_data{
-      4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
-      10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
-      19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
-      24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
-      42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
    };
  
    // Choose quantization parameters carefully.
@@ -131,12 +131,12 @@ TEST(TransposeConvTest, UInt8)
    auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
    auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first,
-                                                      input_quant.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data);
    Tensor filter_tensor = makeInputTensor<DataType::U8>({2, 3, 3, 1}, filter_quant.first,
                                                         filter_quant.second, filter_data);
    Tensor bias_tensor =
-      makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, 0, bias_data);
+    makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, 0, bias_data);
    Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
  
@@ -162,11 +162,11 @@ TEST(TransposeConvTest, UInt8_CWQ)
    std::vector<float> bias_data{3, 4};
    std::vector<int32_t> output_shape_data{1, 5, 5, 2};
    std::vector<float> ref_output_data{
-      4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
-      10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
-      19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
-      24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
-      42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
    };
  
    // Choose quantization parameters carefully.
@@ -190,12 +190,12 @@ TEST(TransposeConvTest, UInt8_CWQ)
      bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
    std::vector<int32_t> zerop(output_channels, 0);
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first,
-                                                      input_quant.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data);
    Tensor filter_tensor = makeInputTensor<DataType::U8>({output_channels, 3, 3, 1}, filter_scales,
                                                         filter_zerops, 0, filter_data);
    Tensor bias_tensor =
-      makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
+    makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
    Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
    Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
  
@@ -220,11 +220,11 @@ TEST(TransposeConvTest, SInt16)
    std::vector<float> bias_data{3, 4};
    std::vector<int32_t> output_shape_data{1, 5, 5, 2};
    std::vector<float> ref_output_data{
-      4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
-      10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
-      19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
-      24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
-      42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
    };
  
    Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data);
@@ -260,11 +260,11 @@ TEST(TransposeConvTest, SInt16_CWQ_weights)
    std::vector<float> bias_data{3, 4};
  
    std::vector<float> ref_output_data{
-      4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
-      10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
-      19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
-      24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
-      42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
    };
  
    const float input_scale = 0.25;
@@ -275,7 +275,7 @@ TEST(TransposeConvTest, SInt16_CWQ_weights)
  
    Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
    Tensor filter_tensor =
-      makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
+    makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
    Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
    Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
    Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-interpreter/src/kernels/Unpack.cpp

index 834b79926d53a9110350035a5c2c72e389b7f5c2..9127241c03936a5c20be3ad46f3b9c591c0d217f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Unpack.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.cpp
@@ -29,7 +29,7 @@ namespace kernels
  {
  
  Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
-    : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+  : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp

index f70c5847a776c5c1952072b7cc19fb67183cd7c8..6d611e12e624f9937eeb5edf6947f4fd008dd844 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -121,11 +121,11 @@ TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs)
  TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs)
  {
    Check<TypeParam>(
-      /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
-      /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
-      /*exp_output_data=*/
-      {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
+    /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
+    /*exp_output_data=*/
+    {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
  }
  
  TYPED_TEST(UnpackTest, VectorToScalar)
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h

index 4b5e729175f184fcb993a0ecf3589b639d6003d9..817a42f831a5f7c2cf71a5d16d16aee1403890bd 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -108,6 +108,8 @@ inline double getQuantizedConvolutionMultipler(float input_scale, float filter_s
    return input_product_scale / static_cast<double>(output_scale);
  }
  
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
  inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
                                                               const std::vector<float> &filter_scale,
                                                               float output_scale)
@@ -118,7 +120,7 @@ inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
    for (size_t i = 0; i < n; ++i)
    {
      effective_output_scales.push_back(
-        getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+      getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
    }
    return effective_output_scales;
  }
@@ -149,6 +151,7 @@ public:
    BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
  
    T operator[](int idx) { return _v[idx * _stride]; }
+
  private:
    const std::vector<T> &_v;
    int _stride;
@@ -236,7 +239,7 @@ public:
  
    // Build with the tensors in 'tensor_list'.
    explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
-      : VectorOfTensors<uint8_t, is_const>(tensor_list)
+    : VectorOfTensors<uint8_t, is_const>(tensor_list)
    {
      for (TensorT *tensor : tensor_list)
      {
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt

index d99485d061c88e57ac3758aa3e47c9f0ec2f7e7d..20a6f03cdba9c31489c538b50e6680c9e066fd1a 100644 (file)
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -1,5 +1,3 @@
-nnas_find_package(GTest REQUIRED)
-
  set(SOURCES
      GraphLoader.h
      GraphLoader.cpp
@@ -16,6 +14,12 @@ target_link_libraries(luci_interpreter_loader
      PUBLIC luci_lang luci_interpreter_core
      PRIVATE luci_interpreter_kernels nncc_common)
  
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
  set(TEST_SOURCES KernelBuilder.test.cpp)
  
  GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES})
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp

index 09e9235970db176d5eb236aa23596f5161ec2e29..bc44c7efa2b624f96a99c3b1ddb49caab28c5870 100644 (file)
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -107,11 +107,11 @@ bool isTensorProducingNode(const luci::CircleNode *node)
  } // namespace
  
  GraphLoader::GraphLoader(
-    const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
-      _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+  const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+  std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+  : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
+    _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
  {
  }
  
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp

index 7b723e88a937ddc0ab5c2540a7b9739cb8b0f027..913a062d79d85c6f6d4722213e430384f59b2b12 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -19,6 +19,7 @@
  #include "kernels/Add.h"
  #include "kernels/ArgMax.h"
  #include "kernels/AveragePool2D.h"
+#include "kernels/BatchToSpaceND.h"
  #include "kernels/Concatenation.h"
  #include "kernels/Conv2D.h"
  #include "kernels/DepthToSpace.h"
@@ -50,7 +51,9 @@
  #include "kernels/Mean.h"
  #include "kernels/Minimum.h"
  #include "kernels/Mul.h"
+#include "kernels/Neg.h"
  #include "kernels/NotEqual.h"
+#include "kernels/Pack.h"
  #include "kernels/Pad.h"
  #include "kernels/Pow.h"
  #include "kernels/Prelu.h"
@@ -63,12 +66,14 @@
  #include "kernels/Rsqrt.h"
  #include "kernels/Slice.h"
  #include "kernels/Softmax.h"
+#include "kernels/SpaceToBatchND.h"
  #include "kernels/SpaceToDepth.h"
  #include "kernels/Split.h"
  #include "kernels/StridedSlice.h"
  #include "kernels/Sqrt.h"
-#include "kernels/Sub.h"
+#include "kernels/SquaredDifference.h"
  #include "kernels/Squeeze.h"
+#include "kernels/Sub.h"
  #include "kernels/Tanh.h"
  #include "kernels/Unpack.h"
  #include "kernels/Transpose.h"
@@ -134,6 +139,11 @@ RuntimeGraph *KernelBuilder::getRuntimeGraph(const loco::Graph *graph) const
    return runtime_graph;
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleNode *)
+{
+  throw std::invalid_argument("Unsupported operator.");
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAdd *node)
  {
    assert(node->arity() == 2);
@@ -179,6 +189,18 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAveragePool2D *no
    return std::make_unique<kernels::AveragePool2D>(input, output, params);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleBatchToSpaceND *node)
+{
+  assert(node->arity() == 3);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *block_shape = getInputTensor(node->block_shape());
+  const Tensor *crops = getInputTensor(node->crops());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConcatenation *node)
  {
    std::vector<const Tensor *> inputs(node->numValues());
@@ -190,6 +212,7 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConcatenation *no
  
    ConcatenationParams params{};
    params.axis = node->axis();
+  params.activation = node->fusedActivationFunction();
  
    return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
  }
@@ -598,6 +621,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMul *node)
    return std::make_unique<kernels::Mul>(input1, input2, output, params);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleNeg *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Neg>(input, output);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleNotEqual *node)
  {
    assert(node->arity() == 2);
@@ -614,6 +647,24 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleOutput *)
    throw std::runtime_error("Output node cannot be executed.");
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CirclePack *node)
+{
+  assert(node->arity() == node->values_count());
+
+  std::vector<const Tensor *> inputs(node->values_count());
+  for (uint32_t i = 0; i < node->values_count(); ++i)
+  {
+    inputs[i] = getInputTensor(node->values(i));
+  }
+  Tensor *output = getOutputTensor(node);
+
+  PackParams params{};
+  params.axis = node->axis();
+  params.values_count = node->values_count();
+
+  return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CirclePad *node)
  {
    assert(node->arity() == 2);
@@ -735,20 +786,6 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleRsqrt *node)
    return std::make_unique<kernels::Rsqrt>(input, output);
  }
  
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSub *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  SubParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Sub>(input1, input2, output, params);
-}
-
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
  {
    assert(node->arity() == 3);
@@ -775,6 +812,20 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSoftmax *node)
    return std::make_unique<kernels::Softmax>(input, output, params);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSpaceToBatchND *node)
+{
+  assert(node->arity() == 3);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *block_shape = getInputTensor(node->block_shape());
+  const Tensor *paddings = getInputTensor(node->paddings());
+
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+  ;
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSpaceToDepth *node)
  {
    assert(node->arity() == 1);
@@ -812,6 +863,17 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqrt *node)
    return std::make_unique<kernels::Sqrt>(input, output);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSquaredDifference *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = getInputTensor(node->x());
+  const Tensor *input2 = getInputTensor(node->y());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
  {
    assert(node->arity() == 1);
@@ -846,6 +908,20 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *nod
    return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSub *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = getInputTensor(node->x());
+  const Tensor *input2 = getInputTensor(node->y());
+  Tensor *output = getOutputTensor(node);
+
+  SubParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node)
  {
    assert(node->arity() == 1);
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h

index 1546ba01ba181f07853e89f46e61df0febb722ef..6f482b29e6ebf591739637f19d8f1298d6c2e380 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -33,15 +33,17 @@ class KernelBuilder : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>
  {
  public:
    KernelBuilder(
-      const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-      const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-      : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+    : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
    {
    }
  
+  std::unique_ptr<Kernel> visit(const luci::CircleNode *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleAdd *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleArgMax *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleAveragePool2D *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleBatchToSpaceND *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleConcatenation *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleConv2D *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleConst *node) override;
@@ -75,8 +77,10 @@ public:
    std::unique_ptr<Kernel> visit(const luci::CircleMean *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleMinimum *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleMul *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleNeg *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleNotEqual *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleOutput *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CirclePack *node) override;
    std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
    std::unique_ptr<Kernel> visit(const luci::CirclePow *node) override;
    std::unique_ptr<Kernel> visit(const luci::CirclePRelu *node) override;
@@ -87,14 +91,16 @@ public:
    std::unique_ptr<Kernel> visit(const luci::CircleResizeNearestNeighbor *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleSub *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSpaceToBatchND *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSquaredDifference *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSub *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override;
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp

index c49a05a49c7940ff1668834caa2882a7629309b7..b49085325d7576e81eb5fa74fcb92929c988d0df 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -50,6 +50,7 @@
  #include <kernels/Mean.h>
  #include <kernels/Minimum.h>
  #include <kernels/Mul.h>
+#include <kernels/Neg.h>
  #include <kernels/NotEqual.h>
  #include <kernels/Pad.h>
  #include <kernels/Pow.h>
@@ -66,9 +67,10 @@
  #include <kernels/SpaceToDepth.h>
  #include <kernels/Split.h>
  #include <kernels/Sqrt.h>
-#include <kernels/Sub.h>
+#include <kernels/SquaredDifference.h>
  #include <kernels/Squeeze.h>
  #include <kernels/StridedSlice.h>
+#include <kernels/Sub.h>
  #include <kernels/Tanh.h>
  #include <kernels/Transpose.h>
  #include <kernels/TransposeConv.h>
@@ -216,6 +218,7 @@ TEST_F(KernelBuilderTest, Concatenation)
    checkTensor(kernel->input(1), input2);
    checkTensor(kernel->output(), op);
    EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
  }
  
  TEST_F(KernelBuilderTest, Conv2D)
@@ -776,6 +779,20 @@ TEST_F(KernelBuilderTest, Mul)
    EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
  }
  
+TEST_F(KernelBuilderTest, Neg)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleNeg>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Neg>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
  TEST_F(KernelBuilderTest, NotEqual)
  {
    auto *x_input = createInputNode();
@@ -1052,24 +1069,21 @@ TEST_F(KernelBuilderTest, Sqrt)
    checkTensor(kernel->output(), op);
  }
  
-TEST_F(KernelBuilderTest, Sub)
+TEST_F(KernelBuilderTest, SquaredDifference)
  {
    auto *input1 = createInputNode();
    auto *input2 = createInputNode();
  
-  auto *op = createNode<luci::CircleSub>();
+  auto *op = createNode<luci::CircleSquaredDifference>();
    op->x(input1);
    op->y(input2);
  
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::Sub>(op);
+  auto kernel = buildKernel<kernels::SquaredDifference>(op);
    ASSERT_THAT(kernel, NotNull());
  
    checkTensor(kernel->input1(), input1);
    checkTensor(kernel->input2(), input2);
    checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
  }
  
  TEST_F(KernelBuilderTest, Squeeze)
@@ -1123,6 +1137,26 @@ TEST_F(KernelBuilderTest, StridedSlice)
    EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
  }
  
+TEST_F(KernelBuilderTest, Sub)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleSub>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Sub>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
  TEST_F(KernelBuilderTest, Tanh)
  {
    auto *input = createInputNode();
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp

index b9a2ae0a904de0910d9c7ae22e293fcdc151dc8b..ff211bf090da8f3bd500825b36e8d432abb36bb2 100644 (file)
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -24,8 +24,8 @@ namespace luci_interpreter
  ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
                             RuntimeToIR &runtime_to_ir,
                             std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
-      _node_to_tensor(node_to_tensor)
+  : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
+    _node_to_tensor(node_to_tensor)
  {
  }
  
diff --git a/compiler/luci-pass-value-test/.gitignore b/compiler/luci-pass-value-test/.gitignore

new file mode 100644 (file)

index 0000000..8dbfa90
--- /dev/null
+++ b/compiler/luci-pass-value-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt

new file mode 100644 (file)

index 0000000..2d2befe
--- /dev/null
+++ b/compiler/luci-pass-value-test/CMakeLists.txt
@@ -0,0 +1,44 @@
+unset(TEST_DEPS)
+unset(LUCI_PASS_VALUE_TESTS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+macro(addeval RECIPE PASS_OPTION)
+  list(APPEND LUCI_PASS_VALUE_TESTS ${RECIPE})
+
+  set(CIRCLE_FILE "${RECIPE}.circle")
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${CIRCLE_FILE}")
+
+  set(PASS_CIRCLE_FILE "${RECIPE}.pass.circle")
+  set(PASS_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PASS_CIRCLE_FILE}")
+
+  set(DASH_PASS_OPTION "--${PASS_OPTION}")
+
+  # Generate optimized .circle
+  add_custom_command(OUTPUT ${PASS_CIRCLE_OUTPUT_PATH}
+    COMMAND $<TARGET_FILE:circle2circle> ${DASH_PASS_OPTION} ${CIRCLE_PATH} ${PASS_CIRCLE_OUTPUT_PATH}
+    DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_PATH}
+    COMMENT "Generate ${PASS_CIRCLE_FILE} with ${DASH_PASS_OPTION}"
+  )
+
+  # depends
+  list(APPEND TEST_DEPS ${PASS_CIRCLE_OUTPUT_PATH})
+
+endmacro(addeval)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+add_custom_target(luci_pass_value_test_files ALL DEPENDS ${TEST_DEPS})
+add_dependencies(luci_pass_value_test_files common_artifacts_deps)
+
+add_test(NAME luci_pass_value_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
+          "${CMAKE_CURRENT_BINARY_DIR}"
+          "${ARTIFACTS_BIN_PATH}"
+          "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+          "$<TARGET_FILE:luci_eval_driver>"
+          ${LUCI_PASS_VALUE_TESTS}
+)
diff --git a/compiler/luci-pass-value-test/README.md b/compiler/luci-pass-value-test/README.md

new file mode 100644 (file)

index 0000000..f09619d
--- /dev/null
+++ b/compiler/luci-pass-value-test/README.md
@@ -0,0 +1,20 @@
+# luci-pass-value-test
+
+`luci-pass-value-test` validates execution result values of tflite model and
+circle model generated with specific optimization.
+
+The test proceeds as follows:
+
+Step 0: Use tflite and circle file in 'common-artifacts' folder as the source model.
+   - tflite file is used as to generate reference execution result
+   - circle file is used as source of optimization to apply
+
+Step 1: Run circle2circle with given optimization option to produce transformed circle.
+   - "modelfile.circle" -> circle2circle -> "modelfile.pass.circle"
+
+Step 2: Run TFLite interpreter and luci-interpreter for the source tflite and circle, respectively.
+        (with the same input tensors filled with random values)
+   - "modelfile.tflite" ------> TFLite interpreter -> Execution result 1
+   - "modelfile.pass.circle" -> luci-interpreter ---> Execution result 2
+
+Step 3: Compare the execution result 1 and 2. Test is PASSED if results are sames.
diff --git a/compiler/luci-pass-value-test/eval_driver.sh b/compiler/luci-pass-value-test/eval_driver.sh

new file mode 100755 (executable)

index 0000000..848b641
--- /dev/null
+++ b/compiler/luci-pass-value-test/eval_driver.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# This script verifies the tflite and circle execution result values
+#
+# HOW TO USE
+#
+# ./eval_driver.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/intp_dir>
+#                  <TEST 1> <TEST 2> ...
+# bin_dir  : build directory of luci-pass-value-test (ex: build/compiler/luci-pass-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
+# intp_dir : path to luci_eval_driver from luci-eval-driver
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/eval_result_verifier.py"
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_TFLITE_FILE="${WORKDIR}/${TESTCASE}.tflite"
+  TESTCASE_CIRCLE_FILE="${BINDIR}/${TESTCASE}.pass.circle"
+  TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    source "${VIRTUALENV}/bin/activate"
+
+    "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${INTERPRETER_DRIVER_PATH}" \
+    --tflite "${TESTCASE_TFLITE_FILE}" \
+    --circle "${TESTCASE_CIRCLE_FILE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-pass-value-test/eval_result_verifier.py b/compiler/luci-pass-value-test/eval_result_verifier.py

new file mode 100644 (file)

index 0000000..c6005ed
--- /dev/null
+++ b/compiler/luci-pass-value-test/eval_result_verifier.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+import numpy as np
+import tensorflow as tf
+import subprocess
+import argparse
+import traceback
+
+#
+# This script was copied from luci-value-test with input arguments are tflite and circle path
+#
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--tflite', type=str, required=True)
+parser.add_argument('--circle', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+tflite_model = args.tflite
+circle_model = args.circle
+
+# Build TFLite interpreter.
+interpreter = tf.lite.Interpreter(tflite_model)
+interpreter.allocate_tensors()
+
+# Generate random input data.
+num_inputs = len(interpreter.get_input_details())
+for i in range(num_inputs):
+    input_details = interpreter.get_input_details()[i]
+    if input_details["dtype"] == np.float32:
+        input_data = np.array(
+            np.random.random_sample(input_details["shape"]), input_details["dtype"])
+    elif input_details["dtype"] == np.uint8:
+        input_data = np.array(
+            np.random.randint(0, 256, size=input_details["shape"]),
+            input_details["dtype"])
+    elif input_details["dtype"] == np.bool_:
+        input_data = np.array(
+            np.random.choice(a=[True, False], size=input_details["shape"]),
+            input_details["dtype"])
+    else:
+        raise SystemExit("Unsupported input dtype")
+
+    interpreter.set_tensor(input_details["index"], input_data)
+    input_data.tofile(circle_model + ".input" + str(i))
+
+# Do inference
+interpreter.invoke()
+
+# Execute luci interpreter.
+subprocess.run(
+    [
+        driver, circle_model,
+        str(num_inputs), circle_model + ".input", circle_model + ".output"
+    ],
+    check=True)
+
+# Compare the results.
+for idx in range(len(interpreter.get_output_details())):
+    output_details = interpreter.get_output_details()[idx]
+    output_data = np.fromfile(circle_model + ".output" + str(idx),
+                              output_details["dtype"])
+    shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+    luci_output_data = np.reshape(output_data, output_shape)
+    try:
+        if output_details["dtype"] == np.uint8:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.float32:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=1.e-5,
+                    atol=1.e-5) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int64:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int32:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        else:
+            raise SystemExit("Unsupported data type: ", output_details["dtype"])
+    except:
+        print(traceback.format_exc())
+        quit(255)
+
+quit(0)
diff --git a/compiler/luci-pass-value-test/requires.cmake b/compiler/luci-pass-value-test/requires.cmake

new file mode 100644 (file)

index 0000000..d977a51
--- /dev/null
+++ b/compiler/luci-pass-value-test/requires.cmake
@@ -0,0 +1,7 @@
+require("common-artifacts")
+require("luci-interpreter")
+require("safemain")
+require("oops")
+require("loco")
+require("luci-value-test")
+require("luci-eval-driver")
diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst

new file mode 100644 (file)

index 0000000..e607904
--- /dev/null
+++ b/compiler/luci-pass-value-test/test.lst
@@ -0,0 +1,30 @@
+#
+# Format:
+#   addeval(MODEL PASS)
+# MODEL: tflite model file name in build/compiler/common-artifacts folder.
+# PASS: Optimization Pass to test. Supports only one Pass for now.
+#
+
+# addeval(Net_Preactivation_BN_000 fuse_preactivation_batchnorm) : value diff exist
+# --> https://github.com/Samsung/ONE/issues/5782
+addeval(Net_Conv_Add_Mul_000 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_000 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_001 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6)
+addeval(Net_Conv_Relu6_000 fuse_activation_function)
+addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv)
+addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv)
+addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
+addeval(Net_Reshape_Reshape_000 remove_redundant_reshape)
+addeval(Net_Squeeze_Squeeze_000 substitute_squeeze_to_reshape)
+addeval(Net_TConv_Add_000 fuse_add_with_tconv)
+addeval(Net_TConv_Add_001 fuse_add_with_tconv)
+addeval(Net_TConv_Add_002 fuse_add_with_tconv)
+addeval(Net_TConv_BN_000 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_001 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv)
+addeval(Net_InstanceNorm_001 fuse_instnorm)
+addeval(Net_InstanceNorm_002 fuse_instnorm)
+addeval(Net_InstanceNorm_003 fuse_instnorm)
+addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
diff --git a/compiler/luci-value-test/.gitignore b/compiler/luci-value-test/.gitignore

new file mode 100644 (file)

index 0000000..8dbfa90
--- /dev/null
+++ b/compiler/luci-value-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt

index ec746340942bb87c21c3d4e2cea54f0df3ea74c0..124f120d49b2fd7e697a126839688c5737b28428 100644 (file)
--- a/compiler/luci-value-test/CMakeLists.txt
+++ b/compiler/luci-value-test/CMakeLists.txt
@@ -12,8 +12,6 @@ include("test.local.lst" OPTIONAL)
  # Generate dependencies
  add_custom_target(luci_eval_testfiles ALL DEPENDS ${TESTFILES})
  
-add_subdirectory(tester)
-
  get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
  
  add_test(NAME luci_value_test
@@ -21,5 +19,6 @@ add_test(NAME luci_value_test
            "${CMAKE_CURRENT_BINARY_DIR}"
            "${ARTIFACTS_BIN_PATH}"
            "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+          "$<TARGET_FILE:luci_eval_driver>"
            ${LUCI_VALUE_TESTS}
  )
diff --git a/compiler/luci-value-test/evalverify.sh b/compiler/luci-value-test/evalverify.sh

index 12c9a459a4b356dcb7d3190f683def83a3c87ccc..01c4bce463a408c072632eefc2126015ba347115 100755 (executable)
--- a/compiler/luci-value-test/evalverify.sh
+++ b/compiler/luci-value-test/evalverify.sh
@@ -14,7 +14,7 @@ VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
  BINDIR="$1"; shift
  WORKDIR="$1"; shift
  VIRTUALENV="$1"; shift
-INTERPRETER_DRIVER_PATH="${BINDIR}/tester/luci_eval_tester"
+INTERPRETER_DRIVER_PATH="$1"; shift
  
  TESTED=()
  PASSED=()
diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py

index 7a2cebb9135ea1a3b3f68b0484bfda9b0fbcab1d..f6b0620d8454911335228a753c1aa7c2625c8b54 100755 (executable)
--- a/compiler/luci-value-test/luci_eval_verifier.py
+++ b/compiler/luci-value-test/luci_eval_verifier.py
@@ -9,7 +9,7 @@ import traceback
  # This script compares the execution result of luci-interpreter with that of TFLite interpreter
  #
  # Basic usage:
-#   eval_verifier.py --driver build/compiler/luci-value-test/tester/luci_eval_tester
+#   eval_verifier.py --driver build/compiler/luci-eval-driver/luci_eval_driver
  #           --model inception_v3
  parser = argparse.ArgumentParser()
  parser.add_argument('--driver', type=str, required=True)
diff --git a/compiler/luci-value-test/requires.cmake b/compiler/luci-value-test/requires.cmake

index f8af5f27edb6d4d22e99d3893d65ddb428b3cca0..e1a0f83676e450a6201d8d87651d7b83574107c6 100644 (file)
--- a/compiler/luci-value-test/requires.cmake
+++ b/compiler/luci-value-test/requires.cmake
@@ -4,3 +4,4 @@ require("luci-interpreter")
  require("safemain")
  require("oops")
  require("loco")
+require("luci-eval-driver")
diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst

index 0e5231ecaeb6a87fdb09ebd03b35a21670b34b03..edf329aff16cba85283f926a0e1d050df9062587 100644 (file)
--- a/compiler/luci-value-test/test.lst
+++ b/compiler/luci-value-test/test.lst
@@ -155,6 +155,7 @@ addeval(Split_000)
  #addeval(Square_000)
  #addeval(SquaredDifference_000)
  addeval(Squeeze_000)
+addeval(Squeeze_001)
  addeval(StridedSlice_000)
  addeval(StridedSlice_001)
  addeval(StridedSlice_002)
diff --git a/compiler/luci-value-test/tester/CMakeLists.txt b/compiler/luci-value-test/tester/CMakeLists.txt

deleted file mode 100644 (file)

index f2a4ff4..0000000
--- a/compiler/luci-value-test/tester/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-set(SRCS_EVAL_TESTER
-      src/EvalTester.cpp
-   )
-
-add_executable(luci_eval_tester ${SRCS_EVAL_TESTER})
-target_link_libraries(luci_eval_tester PRIVATE oops)
-target_link_libraries(luci_eval_tester PRIVATE loco)
-target_link_libraries(luci_eval_tester PRIVATE luci_import)
-target_link_libraries(luci_eval_tester PRIVATE luci_export)
-target_link_libraries(luci_eval_tester PRIVATE luci_lang)
-target_link_libraries(luci_eval_tester PRIVATE luci_interpreter)
-target_link_libraries(luci_eval_tester PRIVATE safemain)
diff --git a/compiler/luci-value-test/tester/src/EvalTester.cpp b/compiler/luci-value-test/tester/src/EvalTester.cpp

deleted file mode 100644 (file)

index b49602e..0000000
--- a/compiler/luci-value-test/tester/src/EvalTester.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <luci/Importer.h>
-#include <luci_interpreter/Interpreter.h>
-#include <luci/CircleExporter.h>
-#include <luci/CircleFileExpContract.h>
-
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <vector>
-#include <map>
-#include <string>
-#include <random>
-
-namespace
-{
-
-void readDataFromFile(const std::string &filename, char *data, size_t data_size)
-{
-  std::ifstream fs(filename, std::ifstream::binary);
-  if (fs.fail())
-    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
-  if (fs.read(data, data_size).fail())
-    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
-}
-
-void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
-{
-  std::ofstream fs(filename, std::ofstream::binary);
-  if (fs.fail())
-    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
-  if (fs.write(data, data_size).fail())
-  {
-    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
-  }
-}
-
-std::unique_ptr<luci::Module> importModel(const std::string &filename)
-{
-  std::ifstream fs(filename, std::ifstream::binary);
-  if (fs.fail())
-  {
-    throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
-  }
-  std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
-                               std::istreambuf_iterator<char>());
-  return luci::Importer().importModule(circle::GetModel(model_data.data()));
-}
-
-template <typename NodeT> size_t getTensorSize(const NodeT *node)
-{
-  uint32_t tensor_size = loco::size(node->dtype());
-  for (uint32_t i = 0; i < node->rank(); ++i)
-    tensor_size *= node->dim(i).value();
-  return tensor_size;
-}
-
-} // namespace
-
-/*
- * @brief EvalTester main
- *
- *        Driver for testing luci-inerpreter
- *
- */
-int entry(int argc, char **argv)
-{
-  if (argc != 5)
-  {
-    std::cerr
-        << "Usage: " << argv[0]
-        << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
-    return EXIT_FAILURE;
-  }
-
-  const char *filename = argv[1];
-  const int32_t num_inputs = atoi(argv[2]);
-  const char *input_prefix = argv[3];
-  const char *output_file = argv[4];
-  const std::string intermediate_filename = std::string(filename) + ".inter.circle";
-
-  // Load model from the file
-  std::unique_ptr<luci::Module> initial_module = importModel(filename);
-  if (initial_module == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  // Export to a Circle file
-  luci::CircleExporter exporter;
-
-  luci::CircleFileExpContract contract(initial_module.get(), intermediate_filename);
-
-  if (!exporter.invoke(&contract))
-  {
-    std::cerr << "ERROR: Failed to export '" << intermediate_filename << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  // Import model again
-  std::unique_ptr<luci::Module> module = importModel(intermediate_filename);
-  if (module == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load '" << intermediate_filename << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  // Create interpreter.
-  luci_interpreter::Interpreter interpreter(module.get());
-
-  // Set input.
-  // Data for n'th input is read from ${input_prefix}n
-  // (ex: Add.circle.input0, Add.circle.input1 ..)
-  const auto input_nodes = loco::input_nodes(module->graph());
-  assert(num_inputs == input_nodes.size());
-  for (int32_t i = 0; i < num_inputs; i++)
-  {
-    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
-    std::vector<char> input_data(getTensorSize(input_node));
-    readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
-                     input_data.size());
-    interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
-  }
-
-  // Do inference.
-  interpreter.interpret();
-
-  // Get output.
-  const auto output_nodes = loco::output_nodes(module->graph());
-  for (int i = 0; i < module->graph()->outputs()->size(); i++)
-  {
-    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
-    std::vector<char> output_data(getTensorSize(output_node));
-    interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
-
-    // Output data is written in ${output_file}
-    // (ex: Add.circle.output0)
-    // Output shape is written in ${output_file}.shape
-    // (ex: Add.circle.output0.shape)
-    writeDataToFile(std::string(output_file) + std::to_string(i), output_data.data(),
-                    output_data.size());
-    // In case of Tensor output is Scalar value.
-    // The output tensor with rank 0 is treated as a scalar with shape (1)
-    if (output_node->rank() == 0)
-    {
-      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", "1", 1);
-    }
-    else
-    {
-      auto shape_str = std::to_string(output_node->dim(0).value());
-      for (int j = 1; j < output_node->rank(); j++)
-      {
-        shape_str += ",";
-        shape_str += std::to_string(output_node->dim(j).value());
-      }
-      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", shape_str.c_str(),
-                      shape_str.size());
-    }
-  }
-  return EXIT_SUCCESS;
-}
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt

index 214a1bbf203c5229f4a40bdacdc8b387e7b6a0cc..3771176f0f99c3f1d50414c2d48ff3bec900d27f 100644 (file)
--- a/compiler/luci/CMakeLists.txt
+++ b/compiler/luci/CMakeLists.txt
@@ -1,8 +1,11 @@
  add_subdirectory(env)
  add_subdirectory(log)
  add_subdirectory(lang)
+add_subdirectory(testhelper)
  add_subdirectory(service)
  add_subdirectory(pass)
+add_subdirectory(profile)
+add_subdirectory(partition)
  add_subdirectory(logex)
  add_subdirectory(import)
  add_subdirectory(export)
diff --git a/compiler/luci/env/include/luci/UserSettings.h b/compiler/luci/env/include/luci/UserSettings.h

index bcfd16071986171d3a46f431bc27ba63e4851328..b56bd65e2fd65a8a1c90e7fa24d0eb3476d95f8d 100644 (file)
--- a/compiler/luci/env/include/luci/UserSettings.h
+++ b/compiler/luci/env/include/luci/UserSettings.h
@@ -32,6 +32,7 @@ struct UserSettings
      Undefined,
      MuteWarnings,
      DisableValidation,
+    ProfilingDataGen,
    };
  
    static UserSettings *settings();
diff --git a/compiler/luci/env/src/UserSettings.cpp b/compiler/luci/env/src/UserSettings.cpp

index 27dec762db4e2783d27c6781a062c973ce1a2d4a..b4c661190f4cff1accc1ce44ad9161a51486be98 100644 (file)
--- a/compiler/luci/env/src/UserSettings.cpp
+++ b/compiler/luci/env/src/UserSettings.cpp
@@ -30,6 +30,7 @@ public:
  private:
    bool _MuteWarnings{false};
    bool _DisableValidation{false};
+  bool _ProfilingDataGen{false};
  };
  
  void UserSettingsImpl::set(const Key key, bool value)
@@ -42,6 +43,9 @@ void UserSettingsImpl::set(const Key key, bool value)
      case Key::DisableValidation:
        _DisableValidation = value;
        break;
+    case Key::ProfilingDataGen:
+      _ProfilingDataGen = value;
+      break;
      default:
        throw std::runtime_error("Invalid key in boolean set");
        break;
@@ -56,6 +60,8 @@ bool UserSettingsImpl::get(const Key key) const
        return _MuteWarnings;
      case Key::DisableValidation:
        return _DisableValidation;
+    case Key::ProfilingDataGen:
+      return _ProfilingDataGen;
      default:
        throw std::runtime_error("Invalid key in boolean get");
        break;
diff --git a/compiler/luci/env/src/UserSettings.test.cpp b/compiler/luci/env/src/UserSettings.test.cpp

index 8d9d1875b39a24ae40e7f4dc6a90c1605725ba2e..899c0c2a1d70be3dd508772534e50c3ffd8a6da5 100644 (file)
--- a/compiler/luci/env/src/UserSettings.test.cpp
+++ b/compiler/luci/env/src/UserSettings.test.cpp
@@ -51,6 +51,18 @@ TEST(UserSettings, DisableValidation)
    ASSERT_TRUE(settings->get(luci::UserSettings::Key::DisableValidation));
  }
  
+TEST(UserSettings, ProfilingDataGen)
+{
+  auto settings = luci::UserSettings::settings();
+  ASSERT_NE(nullptr, settings);
+
+  settings->set(luci::UserSettings::Key::ProfilingDataGen, false);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+
+  settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+  ASSERT_TRUE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+}
+
  TEST(UserSettings, undefined_set_NEG)
  {
    auto settings = luci::UserSettings::settings();
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt

index fe4382ecd32bf775e2c1f0d6f28266447471d006..01f7371100e9e242af9719df6e1b40415396c29b 100644 (file)
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -13,6 +13,7 @@ target_link_libraries(luci_export PRIVATE mio_circle)
  target_link_libraries(luci_export PRIVATE luci_env)
  target_link_libraries(luci_export PRIVATE luci_log)
  target_link_libraries(luci_export PRIVATE luci_logex)
+target_link_libraries(luci_export PRIVATE luci_profile)
  target_link_libraries(luci_export PRIVATE nncc_common)
  target_link_libraries(luci_export PRIVATE locop)
  target_link_libraries(luci_export PRIVATE oops)
diff --git a/compiler/luci/export/include/luci/CircleFileExpContract.h b/compiler/luci/export/include/luci/CircleFileExpContract.h

index eeaf2d9bb0ee89583feb07d0d4fb84607e4af8af..8ef1b5e0cf6b2a0dbcb5d168161157635c731be4 100644 (file)
--- a/compiler/luci/export/include/luci/CircleFileExpContract.h
+++ b/compiler/luci/export/include/luci/CircleFileExpContract.h
@@ -33,7 +33,7 @@ struct CircleFileExpContract : public luci::CircleExporter::Contract
  {
  public:
    CircleFileExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
+    : _module(module), _filepath(filename)
    {
      // NOTHING TO DO
    }
diff --git a/compiler/luci/export/src/CircleExportMetadata.cpp b/compiler/luci/export/src/CircleExportMetadata.cpp

new file mode 100644 (file)

index 0000000..ef905a8
--- /dev/null
+++ b/compiler/luci/export/src/CircleExportMetadata.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleExportMetadata.h"
+
+#include <luci/UserSettings.h>
+
+namespace
+{
+
+void write_u32(std::vector<uint8_t> &to, uint32_t value)
+{
+  to.emplace_back(0xFF & (value >> 0 * 8));
+  to.emplace_back(0xFF & (value >> 1 * 8));
+  to.emplace_back(0xFF & (value >> 2 * 8));
+  to.emplace_back(0xFF & (value >> 3 * 8));
+}
+
+flatbuffers::Offset<circle::Metadata> metadata_offset(flatbuffers::FlatBufferBuilder &builder,
+                                                      luci::SerializedModelData &md,
+                                                      const std::vector<uint8_t> &data,
+                                                      const std::string &metadata_name)
+{
+  auto buffer_id = static_cast<uint32_t>(md._buffers.size());
+  md._buffers.push_back(circle::CreateBufferDirect(builder, &data));
+  return circle::CreateMetadataDirect(builder, metadata_name.c_str(), buffer_id);
+}
+
+} // namespace
+
+namespace luci
+{
+
+// 'source_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_source_table(void)
+{
+  std::vector<uint8_t> data;
+
+  write_u32(data, _source_table.size());
+
+  for (auto &kv : _source_table)
+  {
+    const auto id = kv.first;
+    write_u32(data, id);
+
+    const auto origin_name = kv.second;
+    const auto length = origin_name.length();
+    write_u32(data, length + 1); // name + '\0
+
+    for (uint32_t i = 0; i < length; ++i)
+    {
+      data.emplace_back(origin_name.at(i));
+    }
+    data.emplace_back('\0');
+  }
+
+  return data;
+}
+
+// 'op_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_op_table(void)
+{
+  std::vector<uint8_t> data;
+
+  write_u32(data, _op_table.size());
+
+  for (auto &kv : _op_table)
+  {
+    const auto id = kv.first;
+    write_u32(data, id);
+
+    const auto origins = kv.second;
+    const auto node_num = origins.size();
+    write_u32(data, node_num);
+
+    for (auto origin : origins)
+    {
+      write_u32(data, origin);
+    }
+  }
+
+  return data;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+std::vector<flatbuffers::Offset<circle::Metadata>>
+createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, luci::SerializedModelData &md)
+{
+  std::vector<flatbuffers::Offset<circle::Metadata>> metadata_vec;
+
+  auto settings = luci::UserSettings::settings();
+  if (settings->get(luci::UserSettings::Key::ProfilingDataGen))
+  {
+    metadata_vec.emplace_back(
+      metadata_offset(builder, md, md._metadata.encoded_source_table(), "ONE_source_table"));
+
+    metadata_vec.emplace_back(
+      metadata_offset(builder, md, md._metadata.encoded_op_table(), "ONE_op_table"));
+  }
+
+  return metadata_vec;
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleExportMetadata.h b/compiler/luci/export/src/CircleExportMetadata.h

new file mode 100644 (file)

index 0000000..10cda42
--- /dev/null
+++ b/compiler/luci/export/src/CircleExportMetadata.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_EXPORT_METADATA_H__
+#define __LUCI_CIRCLE_EXPORT_METADATA_H__
+
+#include "SerializedData.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Create Metadata corresponding to model metadata
+ */
+std::vector<flatbuffers::Offset<circle::Metadata>>
+createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_EXPORT_METADATA_H__
diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp

index df75427971466e89f157e5b31ec24bdfef8394ad..7e218191cf4caffd23d744bd6c4d8c09d1bf67ce 100644 (file)
--- a/compiler/luci/export/src/CircleExporterImpl.cpp
+++ b/compiler/luci/export/src/CircleExporterImpl.cpp
@@ -16,10 +16,13 @@
  
  #include "CircleExporterImpl.h"
  #include "Optimize.h"
+#include "CircleExportMetadata.h"
  #include "CircleTensorExporter.h"
  #include "CircleOperationExporter.h"
  #include "CircleExporterUtils.h"
  
+#include <luci/IR/CircleNodes.h>
+
  #include <oops/InternalExn.h>
  #include <mio/circle/schema_generated.h>
  #include <flatbuffers/flatbuffers.h>
@@ -27,46 +30,16 @@
  #include <cassert>
  #include <unordered_map>
  #include <string>
-#include <stdexcept>
+#include <vector>
  
  namespace
  {
  
-luci::CircleInput *input_node(loco::Graph *g, const loco::GraphInputIndex &index)
-{
-  for (uint32_t n = 0; n < g->nodes()->size(); ++n)
-  {
-    if (auto input = dynamic_cast<luci::CircleInput *>(g->nodes()->at(n)))
-    {
-      if (input->indexed() && input->index() == index)
-      {
-        return input;
-      }
-    }
-  }
-  return nullptr;
-}
-
-luci::CircleOutput *output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
-{
-  for (uint32_t n = 0; n < g->nodes()->size(); ++n)
-  {
-    if (auto output = dynamic_cast<luci::CircleOutput *>(g->nodes()->at(n)))
-    {
-      if (output->indexed() && output->index() == index)
-      {
-        return output;
-      }
-    }
-  }
-  return nullptr;
-}
-
  void registerGraphInputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
  {
    for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
    {
-    auto node = input_node(graph, n);
+    auto node = luci::input_node(graph, n);
      assert(node != nullptr);
      ctx._inputs.push_back(luci::get_tensor_index(node));
    }
@@ -76,7 +49,7 @@ void registerGraphOutputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
  {
    for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
    {
-    auto push = output_node(graph, n);
+    auto push = luci::output_node(graph, n);
      assert(push != nullptr);
      auto node = push->from();
      assert(node != nullptr);
@@ -113,7 +86,7 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<luci::OpCode,
      else
      {
        operator_codes_vec[idx] =
-          CreateOperatorCode(builder, it.first.opcode, builder.CreateString(it.first.custom_code));
+        CreateOperatorCode(builder, it.first.opcode, builder.CreateString(it.first.custom_code));
      }
    }
  
@@ -186,16 +159,16 @@ void CircleExporterImpl::exportGraph(loco::Graph *graph)
    std::string description_str = "nnpackage";
    auto description = _builder.CreateString(description_str);
  
+  // Metadata
+  auto metadata_vec = createCircleMetadataVector(_builder, md);
+  auto metadata = _builder.CreateVector(std::vector<Offset<Metadata>>(metadata_vec));
+
    // create array of buffers
    auto buffers = _builder.CreateVector(md._buffers);
  
-  // empty metadata
-  std::vector<int> metadata_buffer_vec;
-  auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
-
    // Model
    auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
-                                  buffers, metadata_buffer);
+                                  buffers, 0 /* metadata_buffer */, metadata);
    FinishModelBuffer(_builder, model_offset);
  }
  
@@ -250,19 +223,19 @@ void CircleExporterImpl::exportModule(Module *module)
    std::string description_str = "nnpackage";
    auto description = _builder.CreateString(description_str);
  
+  // Metadata
+  auto metadata_vec = createCircleMetadataVector(_builder, md);
+  auto metadata = _builder.CreateVector(std::vector<Offset<Metadata>>(metadata_vec));
+
    // create array of buffers
    auto buffers = _builder.CreateVector(md._buffers);
  
-  // empty metadata
-  std::vector<int> metadata_buffer_vec;
-  auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
-
    // This version is taken from comment in fbs
    constexpr uint32_t version = 0;
  
    // Model
    auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
-                                  buffers, metadata_buffer);
+                                  buffers, 0 /* metadata_buffer */, metadata);
    FinishModelBuffer(_builder, model_offset);
  }
  
diff --git a/compiler/luci/export/src/CircleExporterImpl.h b/compiler/luci/export/src/CircleExporterImpl.h

index e5d5b5a00865d6ce004bccefc8521556820e7724..069f62afd4a1111def45b86da3fff85b2fbfe293 100644 (file)
--- a/compiler/luci/export/src/CircleExporterImpl.h
+++ b/compiler/luci/export/src/CircleExporterImpl.h
@@ -22,8 +22,6 @@
  
  #include "SerializedData.h"
  
-#include "SerializedData.h"
-
  #include <mio/circle/schema_generated.h>
  
  #include <loco.h>
diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp

index 3715513e026c5fa8d733fed7d57fa23159a79670..1b21fdd86932132636e1d06559ed3f8373691d3e 100644 (file)
--- a/compiler/luci/export/src/CircleExporterUtils.cpp
+++ b/compiler/luci/export/src/CircleExporterUtils.cpp
@@ -208,13 +208,13 @@ circle::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *
    //
    // NOTE input and output 'feature' map are shape of NHWC
    bool same_padding_criterion_1 =
-      (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
-      (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+    (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+    (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
  
    // For same padding, rear padding is same or bigger than front padding by at most 1
    bool same_padding_criterion_2 =
-      (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
-      (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+    (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+    (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
  
    if (same_padding_criterion_1 && same_padding_criterion_2)
      return circle::Padding_SAME;
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp

index 4343cf3c92cf27a220f9706d86694aacab420f74..4bf674b9b9891843535f601e5808d254739f1c32 100644 (file)
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -21,6 +21,7 @@
  #include <luci/IR/CircleNode.h>
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  #include <luci/UserSettings.h>
  #include <luci/Log.h>
  
@@ -53,8 +54,8 @@ template <class CirclePool2D>
  void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
  {
    LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
-                  builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
-                  builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+                builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
+                builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
                "Should be L2Pool, MaxPool or AvgPool");
    LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
  
@@ -81,7 +82,7 @@ void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator b
                   circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
  {
    uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+    ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
    std::vector<int32_t> inputs_vec;
    std::vector<int32_t> outputs_vec{get_tensor_index(node)};
    for (uint32_t i = 0; i < node->arity(); ++i)
@@ -98,7 +99,7 @@ void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator b
  void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
  {
    uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+    ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
    std::vector<int32_t> inputs_vec;
    std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
    for (uint32_t i = 0; i < node->arity(); ++i)
@@ -152,7 +153,7 @@ void export_node(ExportContext &ctx, luci::CircleCast *node)
  void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
  {
    uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
+    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
    std::vector<int32_t> inputs_vec;
    std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
  
@@ -171,6 +172,7 @@ void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
  void export_node(ExportContext &ctx, luci::CircleCustom *node)
  {
    auto custom_outputs = loco::succs(node);
+  assert(custom_outputs.size() == node->numOutputs());
  
    uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
    std::vector<int32_t> inputs_vec;
@@ -260,9 +262,9 @@ void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
    uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
                                                   node->op_version());
    std::vector<int32_t> inputs_vec{
-      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
-      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
-      get_tensor_index(node->score_threshold()),
+    get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
+    get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+    get_tensor_index(node->score_threshold()),
    };
    std::vector<int32_t> outputs_vec;
  
@@ -290,8 +292,8 @@ void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
    auto outputs = ctx.builder.CreateVector(outputs_vec);
    auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
    auto op_offset =
-      CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                     circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
+    CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                   circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
    ctx.gd._operators.push_back(op_offset);
  }
  
@@ -303,9 +305,9 @@ void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
    uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
                                                   node->op_version());
    std::vector<int32_t> inputs_vec{
-      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
-      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
-      get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
+    get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
+    get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+    get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
    };
    std::vector<int32_t> outputs_vec;
  
@@ -333,15 +335,15 @@ void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
    auto outputs = ctx.builder.CreateVector(outputs_vec);
    auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
    auto op_offset =
-      CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                     circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
+    CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                   circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
    ctx.gd._operators.push_back(op_offset);
  }
  
  void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
  {
    uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
+    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
    std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
    std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
    auto inputs = ctx.builder.CreateVector(inputs_vec);
@@ -397,7 +399,7 @@ void export_node(ExportContext &ctx, luci::CircleSplitV *node)
    assert(int32_t(split_outs.size()) == node->num_split());
  
    uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
+    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
    std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
                                    get_tensor_index(node->size_splits()),
                                    get_tensor_index(node->split_dim())};
@@ -438,7 +440,7 @@ void export_node(ExportContext &ctx, luci::CircleTopKV2 *node)
    assert(outs_count == 2);
  
    uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
+    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
    std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
    std::vector<int32_t> outputs_vec;
  
@@ -475,7 +477,7 @@ void export_node(ExportContext &ctx, luci::CircleUnique *node)
    auto unique_outs = loco::succs(node);
    assert(int32_t(unique_outs.size()) == 2);
    uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
  
    std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
    std::vector<int32_t> outputs_vec;
@@ -526,7 +528,7 @@ void export_node(ExportContext &ctx, luci::CircleUnpack *node)
    }
  
    uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
+    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
    std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
    std::vector<int32_t> outputs_vec;
  
@@ -622,6 +624,7 @@ public:
    void visit(luci::CircleAveragePool2D *) final;
    void visit(luci::CircleBatchMatMul *) final;
    void visit(luci::CircleBatchToSpaceND *) final;
+  void visit(luci::CircleBidirectionalSequenceLSTM *) final;
    void visit(luci::CircleCast *) final;
    void visit(luci::CircleCeil *) final;
    void visit(luci::CircleConcatenation *) final;
@@ -637,6 +640,7 @@ public:
    void visit(luci::CircleEqual *) final;
    void visit(luci::CircleExp *) final;
    void visit(luci::CircleExpandDims *) final;
+  void visit(luci::CircleFakeQuant *) final;
    void visit(luci::CircleFill *) final;
    void visit(luci::CircleFloor *) final;
    void visit(luci::CircleFloorDiv *) final;
@@ -734,6 +738,7 @@ public:
    void visit(luci::CircleOutputDummy *) final {}
    void visit(luci::CircleOutputExclude *) final {}
    // Virtual for multiple-outputs
+  void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {}
    void visit(luci::CircleCustomOut *) final {}
    void visit(luci::CircleIfOut *) final {}
    void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
@@ -782,8 +787,8 @@ void OperationExporter::visit(luci::CircleAbs *node)
  void OperationExporter::visit(luci::CircleAdd *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
-      CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+    node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
+    CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
  void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
@@ -791,15 +796,15 @@ void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node);
  void OperationExporter::visit(luci::CircleArgMax *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
-      CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
+    node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
+    CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
  }
  
  void OperationExporter::visit(luci::CircleArgMin *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
-      CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
+    node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
+    CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
  }
  
  void OperationExporter::visit(luci::CircleAveragePool2D *node)
@@ -814,6 +819,48 @@ void OperationExporter::visit(luci::CircleBatchMatMul *node)
                  CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
  }
  
+void OperationExporter::visit(luci::CircleBidirectionalSequenceLSTM *node)
+{
+  auto bidi_lstm_outs = loco::succs(node);
+  assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2));
+  uint32_t op_idx = _ctx.md.registerBuiltinOpcode(
+    circle::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, node->op_version());
+
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < 2; index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : bidi_lstm_outs)
+    {
+      auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out);
+      if (bidi_lstm_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(bidi_lstm_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output");
+    }
+  }
+
+  auto inputs = _ctx.builder.CreateVector(inputs_vec);
+  auto outputs = _ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateBidirectionalSequenceLSTMOptions(
+    _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
+    node->proj_clip(), node->merge_outputs(), node->time_major(),
+    node->asymmetric_quantize_inputs());
+  auto op_offset =
+    CreateOperator(_ctx.builder, op_idx, inputs, outputs,
+                   circle::BuiltinOptions_BidirectionalSequenceLSTMOptions, options.Union());
+  _ctx.gd._operators.push_back(op_offset);
+}
+
  void OperationExporter::visit(luci::CircleCast *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleCeil *node)
@@ -837,7 +884,7 @@ void OperationExporter::visit(luci::CircleConv2D *node)
                                      node->stride()->w(), node->stride()->h(),
                                      to_circle_actfunc(node->fusedActivationFunction()),
                                      node->dilation()->w(), node->dilation()->h())
-                    .Union());
+                  .Union());
  }
  
  void OperationExporter::visit(luci::CircleCos *node)
@@ -857,14 +904,13 @@ void OperationExporter::visit(luci::CircleDepthToSpace *node)
  
  void OperationExporter::visit(luci::CircleDepthwiseConv2D *node)
  {
-  export_simple(node, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
-                circle::BuiltinOptions_DepthwiseConv2DOptions,
-                CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
-                                             node->stride()->w(), node->stride()->h(),
-                                             node->depthMultiplier(),
-                                             to_circle_actfunc(node->fusedActivationFunction()),
-                                             node->dilation()->w(), node->dilation()->h())
-                    .Union());
+  export_simple(
+    node, circle::BuiltinOperator_DEPTHWISE_CONV_2D, circle::BuiltinOptions_DepthwiseConv2DOptions,
+    CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()), node->stride()->w(),
+                                 node->stride()->h(), node->depthMultiplier(),
+                                 to_circle_actfunc(node->fusedActivationFunction()),
+                                 node->dilation()->w(), node->dilation()->h())
+      .Union());
  }
  
  void OperationExporter::visit(luci::CircleDequantize *node)
@@ -875,8 +921,8 @@ void OperationExporter::visit(luci::CircleDequantize *node)
  void OperationExporter::visit(luci::CircleDiv *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
-      CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+    node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
+    CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
  void OperationExporter::visit(luci::CircleElu *node)
@@ -902,6 +948,14 @@ void OperationExporter::visit(luci::CircleExpandDims *node)
                  CreateExpandDimsOptions(_ctx.builder).Union());
  }
  
+void OperationExporter::visit(luci::CircleFakeQuant *node)
+{
+  export_simple(node, circle::BuiltinOperator_FAKE_QUANT, circle::BuiltinOptions_FakeQuantOptions,
+                CreateFakeQuantOptions(_ctx.builder, node->min(), node->max(), node->num_bits(),
+                                       node->narrow_range())
+                  .Union());
+}
+
  void OperationExporter::visit(luci::CircleFill *node)
  {
    export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
@@ -928,10 +982,10 @@ void OperationExporter::visit(luci::CircleFloorMod *node)
  void OperationExporter::visit(luci::CircleFullyConnected *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
-      CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
-                                  to_circle_weightsformat(node->weights_format()))
-          .Union());
+    node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
+    CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
+                                to_circle_weightsformat(node->weights_format()))
+      .Union());
  }
  
  void OperationExporter::visit(luci::CircleGather *node)
@@ -964,9 +1018,8 @@ void OperationExporter::visit(luci::CircleIf *node) { export_node(_ctx, node); }
  void OperationExporter::visit(luci::CircleL2Normalize *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
-      CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
-          .Union());
+    node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
+    CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
  void OperationExporter::visit(luci::CircleL2Pool2D *node)
@@ -998,7 +1051,7 @@ void OperationExporter::visit(luci::CircleLocalResponseNormalization *node)
                  circle::BuiltinOptions_LocalResponseNormalizationOptions,
                  CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
                                                          node->alpha(), node->beta())
-                    .Union());
+                  .Union());
  }
  
  void OperationExporter::visit(luci::CircleLog *node)
@@ -1074,15 +1127,15 @@ void OperationExporter::visit(luci::CircleMinimum *node)
  void OperationExporter::visit(luci::CircleMirrorPad *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
-      CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
+    node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
+    CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
  }
  
  void OperationExporter::visit(luci::CircleMul *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
-      CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+    node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
+    CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
  void OperationExporter::visit(luci::CircleNeg *node)
@@ -1190,7 +1243,7 @@ void OperationExporter::visit(luci::CircleReluN1To1 *node)
  void OperationExporter::visit(luci::CircleReshape *node)
  {
    auto new_shape = _ctx.builder.CreateVector<int32_t>(
-      node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
+    node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
  
    export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
                  CreateReshapeOptions(_ctx.builder, new_shape).Union());
@@ -1199,9 +1252,9 @@ void OperationExporter::visit(luci::CircleReshape *node)
  void OperationExporter::visit(luci::CircleResizeBilinear *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
-      CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
-          .Union());
+    node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
+    CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
+      .Union());
  }
  
  void OperationExporter::visit(luci::CircleResizeNearestNeighbor *node)
@@ -1214,8 +1267,8 @@ void OperationExporter::visit(luci::CircleResizeNearestNeighbor *node)
  void OperationExporter::visit(luci::CircleReverseSequence *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
-      CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
+    node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
+    CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
  }
  
  void OperationExporter::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
@@ -1334,14 +1387,14 @@ void OperationExporter::visit(luci::CircleStridedSlice *node)
                  CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
                                            node->ellipsis_mask(), node->new_axis_mask(),
                                            node->shrink_axis_mask())
-                    .Union());
+                  .Union());
  }
  
  void OperationExporter::visit(luci::CircleSub *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
-      CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+    node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
+    CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
  void OperationExporter::visit(luci::CircleSum *node)
@@ -1375,7 +1428,7 @@ void OperationExporter::visit(luci::CircleTransposeConv *node)
                  circle::BuiltinOptions_TransposeConvOptions,
                  CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
                                             node->stride()->w(), node->stride()->h())
-                    .Union());
+                  .Union());
  }
  
  void OperationExporter::visit(luci::CircleUnidirectionalSequenceLSTM *node)
@@ -1383,10 +1436,10 @@ void OperationExporter::visit(luci::CircleUnidirectionalSequenceLSTM *node)
    export_simple(node, circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
                  circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions,
                  CreateUnidirectionalSequenceLSTMOptions(
-                    _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
-                    node->cell_clip(), node->proj_clip(), node->time_major(),
-                    node->asymmetric_quantize_inputs())
-                    .Union());
+                  _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
+                  node->cell_clip(), node->proj_clip(), node->time_major(),
+                  node->asymmetric_quantize_inputs())
+                  .Union());
  }
  
  void OperationExporter::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
@@ -1413,14 +1466,14 @@ void OperationExporter::visit(luci::CircleBCQFullyConnected *node)
                  circle::BuiltinOptions_BCQFullyConnectedOptions,
                  CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
                                                 to_circle_actfunc(node->fusedActivationFunction()))
-                    .Union());
+                  .Union());
  }
  
  void OperationExporter::visit(luci::CircleBCQGather *node)
  {
    export_simple(
-      node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
-      CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
+    node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
+    CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
  }
  
  void OperationExporter::visit(luci::CircleInstanceNorm *node)
@@ -1429,7 +1482,7 @@ void OperationExporter::visit(luci::CircleInstanceNorm *node)
                  circle::BuiltinOptions_InstanceNormOptions,
                  CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
                                            to_circle_actfunc(node->fusedActivationFunction()))
-                    .Union());
+                  .Union());
  }
  
  void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
@@ -1439,7 +1492,19 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria
    {
      ExportContext ctx{builder, md, gd};
      OperationExporter exporter{ctx};
+
+    const auto ops_size = gd._operators.size();
+
      circle_node->accept(&exporter);
+    if (has_origin(circle_node) && ops_size != gd._operators.size())
+    {
+      const auto node_id = gd._operators.size() - 1;
+      for (auto source : get_origin(circle_node)->sources())
+      {
+        md._metadata.add_source_table(source->id(), source->name());
+        md._metadata.add_op_table(node_id, source->id());
+      }
+    }
    }
    else
    {
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp

index 9bdfa00797dbe31437deba6fe54fcead97dbcc8e..fefdf4e73d5996d4cbbc519814ac6f524edf1dc4 100644 (file)
--- a/compiler/luci/export/src/CircleTensorExporter.cpp
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -15,11 +15,9 @@
   */
  
  #include "CircleTensorExporter.h"
-#include "TypeBridge.h"
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
-#include <luci/IR/CircleShapeSignature.h>
  #include <luci/Service/CircleTypeInference.h>
  #include <luci/Service/CircleShapeInference.h>
  #include <luci/Log.h>
@@ -38,10 +36,10 @@ namespace
  
  using namespace luci;
  
-class CircleTensoInfo
+class CircleTensorInfo
  {
  public:
-  CircleTensoInfo() = default;
+  CircleTensorInfo() = default;
  
  public:
    void name(const std::string &name) { _name = name; }
@@ -54,9 +52,6 @@ public:
    const ShapeDescription &shape(void) const { return _shape; }
    void shape(const ShapeDescription &shape) { _shape = shape; }
  
-  const ShapeSignature &shape_signature(void) const { return _shape_signature; }
-  void shape_signature(const ShapeSignature &ss) { _shape_signature = ss; }
-
    luci::ShapeStatus shape_status(void) const { return _shape_status; }
    void shape_status(luci::ShapeStatus ss) { _shape_status = ss; }
  
@@ -75,7 +70,6 @@ private:
  
    circle::TensorType _dtype{circle::TensorType_FLOAT32};
    ShapeDescription _shape{};
-  ShapeSignature _shape_signature;
    luci::ShapeStatus _shape_status{luci::ShapeStatus::UNDEFINED};
  
    luci::CircleConst *_content = nullptr;
@@ -83,7 +77,29 @@ private:
    luci::SparsityParam *_sparsityparam = nullptr;
  };
  
-using CircleTensorContext = std::vector<CircleTensoInfo>;
+class CircleTensorContext
+{
+public:
+  CircleTensorContext() = default;
+
+public:
+  void emplace_back(CircleTensorInfo &ti)
+  {
+    assert(_names.find(ti.name()) == _names.end());
+    _tis.emplace_back(ti);
+    _names.insert(ti.name());
+  }
+  size_t size(void) const { return _tis.size(); }
+  std::vector<CircleTensorInfo>::iterator begin(void) { return _tis.begin(); }
+  std::vector<CircleTensorInfo>::iterator end(void) { return _tis.end(); }
+
+public:
+  bool exist(const std::string &name) const { return _names.find(name) != _names.end(); }
+
+private:
+  std::vector<CircleTensorInfo> _tis;
+  std::set<std::string> _names;
+};
  
  struct NoOpDetector final : public luci::CircleNodeMutableVisitor<bool>
  {
@@ -102,17 +118,23 @@ void allocateCircleTensorInfo(CircleNode *node, CircleTensorContext &ctx)
  
    auto tensor_index = static_cast<CircleTensorIndex>(ctx.size());
    // TODO Use Graph-level metadata for Input & Output
-  // auto tensor_name = "t_" + std::to_string(tensor_index);
    std::string tensor_name = node->name();
-  if (tensor_name.empty())
-    tensor_name = "t_" + std::to_string(tensor_index);
+  // NOTE tensor_name maybe empty. this assertion will alert when this happens.
+  //      currently we require tensor should have a name.
+  // TODO if this breaks, fix the cause or permit empty tensor_name.
+  assert(!tensor_name.empty());
+  if (ctx.exist(tensor_name))
+  {
+    // NOTE this should assign unique name for a Tensor.
+    tensor_name = tensor_name + "_" + std::to_string(tensor_index);
+    assert(!ctx.exist(tensor_name));
+  }
    INFO(l) << "[luci] Tensor for " << tensor_name << ": " << tensor_index << std::endl;
  
-  CircleTensoInfo tensor_info;
+  CircleTensorInfo tensor_info;
  
    tensor_info.name(tensor_name);
    tensor_info.dtype(to_circle_tensortype(node->dtype()));
-  tensor_info.shape_signature(node->shape_signature());
    if (node->shape_status() == ShapeStatus::VALID)
      tensor_info.shape(to_shape_description(node));
    tensor_info.shape_status(node->shape_status());
@@ -146,19 +168,55 @@ private:
    }
  
  public:
+  bool visit(luci::CircleBidirectionalSequenceLSTMOut *) final { return true; }
+  bool visit(luci::CircleCustomOut *) final { return true; }
    bool visit(luci::CircleIfOut *) final { return true; }
+  bool visit(luci::CircleNonMaxSuppressionV4Out *) final { return true; }
+  bool visit(luci::CircleNonMaxSuppressionV5Out *) final { return true; }
    bool visit(luci::CircleSplitOut *) final { return true; }
    bool visit(luci::CircleSplitVOut *) final { return true; }
    bool visit(luci::CircleTopKV2Out *) final { return true; }
    bool visit(luci::CircleUnpackOut *) final { return true; }
+  bool visit(luci::CircleUniqueOut *) final { return true; }
    bool visit(luci::CircleWhileOut *) final { return true; }
  
+  bool visit(luci::CircleBidirectionalSequenceLSTM *node) final
+  {
+    if (node->merge_outputs())
+    {
+      store_outputs(node, 1);
+    }
+    else
+    {
+      store_outputs(node, 2);
+    }
+    return true;
+  }
+
+  bool visit(luci::CircleCustom *node) final
+  {
+    store_outputs(node, node->numOutputs());
+    return true;
+  }
+
    bool visit(luci::CircleIf *node) final
    {
      store_outputs(node, node->output_count());
      return true;
    }
  
+  bool visit(luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    store_outputs(node, 2);
+    return true;
+  }
+
+  bool visit(luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    store_outputs(node, 3);
+    return true;
+  }
+
    bool visit(luci::CircleSplit *node) final
    {
      store_outputs(node, uint32_t(node->num_split()));
@@ -183,6 +241,12 @@ public:
      return true;
    }
  
+  bool visit(luci::CircleUnique *node) final
+  {
+    store_outputs(node, 2);
+    return true;
+  }
+
    bool visit(luci::CircleWhile *node) final
    {
      store_outputs(node, node->output_count());
@@ -237,16 +301,26 @@ flatbuffers::Offset<Vector<int32_t>> encodeShape(FlatBufferBuilder &builder,
                                                   const ShapeDescription &shape)
  {
    assert(shape._rank_known && "unknown number of dimensions is not supported");
-  return builder.CreateVector(shape._dims);
+
+  std::vector<int32_t> encoded_shape;
+  encoded_shape.resize(shape._dims.size());
+  for (uint32_t i = 0; i < shape._dims.size(); ++i)
+    encoded_shape.at(i) = shape._dims.at(i) == -1 ? 1 : shape._dims.at(i);
+
+  return builder.CreateVector(encoded_shape);
  }
  
  flatbuffers::Offset<Vector<int32_t>> encodeShapeSignature(FlatBufferBuilder &builder,
-                                                          const ShapeSignature &shape_signature)
+                                                          const ShapeDescription &shape)
  {
-  if (shape_signature.rank() == 0)
-    return 0;
+  assert(shape._rank_known && "unknown number of dimensions is not supported");
+
+  // shape_signature is set if and only if at least one of dimensions are unknown.
+  for (uint32_t i = 0; i < shape._dims.size(); ++i)
+    if (shape._dims.at(i) == -1)
+      return builder.CreateVector(shape._dims);
  
-  return builder.CreateVector(shape_signature.as_vector());
+  return flatbuffers::Offset<Vector<int32_t>>();
  }
  
  flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder)
@@ -343,14 +417,14 @@ encodeSparsityParameters(FlatBufferBuilder &builder, luci::SparsityParam *sparsi
      // array_segments
      auto circle_array_segments = to_circle_sparse_index_vector(builder, it.array_segments());
      auto circle_array_segments_type =
-        to_circle_sparse_index_vector_type(it.array_segments().type());
+      to_circle_sparse_index_vector_type(it.array_segments().type());
  
      // array_indices
      auto circle_array_indices = to_circle_sparse_index_vector(builder, it.array_indices());
      auto circle_array_indices_type = to_circle_sparse_index_vector_type(it.array_indices().type());
      auto dim_metadata = circle::CreateDimensionMetadata(
-        builder, to_circle_dimensiontype(it.format()), it.dense_size(), circle_array_segments_type,
-        circle_array_segments, circle_array_indices_type, circle_array_indices);
+      builder, to_circle_dimensiontype(it.format()), it.dense_size(), circle_array_segments_type,
+      circle_array_segments, circle_array_indices_type, circle_array_indices);
      dim_metadata_vec.emplace_back(dim_metadata);
    }
  
@@ -358,6 +432,18 @@ encodeSparsityParameters(FlatBufferBuilder &builder, luci::SparsityParam *sparsi
                                                  &sparsityparam->block_map, &dim_metadata_vec);
  }
  
+template <loco::DataType DT> bool has_same_elements(luci::CircleConst *lhs, luci::CircleConst *rhs)
+{
+  assert(lhs->dtype() == DT);
+  assert(rhs->dtype() == DT);
+  assert(lhs->size<DT>() == rhs->size<DT>());
+
+  for (uint32_t i = 0; i < lhs->size<DT>(); ++i)
+    if (lhs->at<DT>(i) != rhs->at<DT>(i))
+      return false;
+  return true;
+}
+
  bool has_same_values(luci::CircleConst *lhs, luci::CircleConst *rhs)
  {
    if (lhs->dtype() != rhs->dtype())
@@ -373,34 +459,31 @@ bool has_same_values(luci::CircleConst *lhs, luci::CircleConst *rhs)
    switch (lhs->dtype())
    {
      case loco::DataType::FLOAT32:
-      for (uint32_t i = 0; i < lhs->size<loco::DataType::FLOAT32>(); ++i)
-        if (lhs->at<loco::DataType::FLOAT32>(i) != rhs->at<loco::DataType::FLOAT32>(i))
-          return false;
-      break;
+      return has_same_elements<loco::DataType::FLOAT32>(lhs, rhs);
+
+    case loco::DataType::S8:
+      return has_same_elements<loco::DataType::S8>(lhs, rhs);
+
+    case loco::DataType::S16:
+      return has_same_elements<loco::DataType::S16>(lhs, rhs);
  
      case loco::DataType::S32:
-      for (uint32_t i = 0; i < lhs->size<loco::DataType::S32>(); ++i)
-        if (lhs->at<loco::DataType::S32>(i) != rhs->at<loco::DataType::S32>(i))
-          return false;
-      break;
+      return has_same_elements<loco::DataType::S32>(lhs, rhs);
  
      case loco::DataType::S64:
-      for (uint32_t i = 0; i < lhs->size<loco::DataType::S64>(); ++i)
-        if (lhs->at<loco::DataType::S64>(i) != rhs->at<loco::DataType::S64>(i))
-          return false;
-      break;
+      return has_same_elements<loco::DataType::S64>(lhs, rhs);
+
+    case loco::DataType::U8:
+      return has_same_elements<loco::DataType::U8>(lhs, rhs);
  
      case loco::DataType::BOOL:
-      for (uint32_t i = 0; i < lhs->size<loco::DataType::BOOL>(); ++i)
-        if (lhs->at<loco::DataType::BOOL>(i) != rhs->at<loco::DataType::BOOL>(i))
-          return false;
-      break;
+      return has_same_elements<loco::DataType::BOOL>(lhs, rhs);
  
      default:
-      return false;
+      break;
    }
  
-  return true;
+  return false;
  }
  
  uint32_t get_buffer_id(FlatBufferBuilder &builder, SerializedModelData &md, luci::CircleConst *node)
@@ -433,26 +516,28 @@ uint32_t get_buffer_id(FlatBufferBuilder &builder, SerializedModelData &md, luci
    }
  }
  
-void exportOpDefinedTensor(const CircleTensoInfo &info, FlatBufferBuilder &builder,
+void exportOpDefinedTensor(const CircleTensorInfo &info, FlatBufferBuilder &builder,
                             SerializedModelData &md, SerializedGraphData &gd)
  {
    // Create and register output tensor shape
    flatbuffers::Offset<Vector<int32_t>> shape_offset;
+  flatbuffers::Offset<Vector<int32_t>> shape_signature_offset;
    if (info.shape_status() == ShapeStatus::VALID)
+  {
      shape_offset = encodeShape(builder, info.shape());
+    shape_signature_offset = encodeShapeSignature(builder, info.shape());
+  }
  
    auto quantparam = encodeQuantizationParameters(builder, info.quantparam());
  
    auto sparsityparam = encodeSparsityParameters(builder, info.sparsityparam());
  
-  auto shape_signature_offset = encodeShapeSignature(builder, info.shape_signature());
-
    auto buffer_id = get_buffer_id(builder, md, info.content());
  
    auto name_offset = builder.CreateString(info.name());
    auto tensor_offset =
-      CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam,
-                   /*is_variable*/ false, sparsityparam, shape_signature_offset);
+    CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam,
+                 /*is_variable*/ false, sparsityparam, shape_signature_offset);
    gd._tensors.push_back(tensor_offset);
  }
  
diff --git a/compiler/luci/export/src/Optimize.cpp b/compiler/luci/export/src/Optimize.cpp

index 036a4a2f988bfdeb2054203258bb4002784e80b9..e59f15204a341557d13e9bb4b2d9e708256a9bd7 100644 (file)
--- a/compiler/luci/export/src/Optimize.cpp
+++ b/compiler/luci/export/src/Optimize.cpp
@@ -17,9 +17,8 @@
  #include "Optimize.h"
  #include "ProgressReporter.h"
  
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/ShapeSignatureInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
+#include <luci/Pass/CircleShapeInferencePass.h>
+#include <luci/Pass/CircleTypeInferencePass.h>
  
  #include <logo/Phase.h>
  
@@ -33,9 +32,8 @@ void optimize(loco::Graph *g)
    logo::Phase phase;
    {
      // prepare type and shape before optimization
-    phase.emplace_back(std::make_unique<TypeInferencePass>());
-    phase.emplace_back(std::make_unique<ShapeInferencePass>());
-    phase.emplace_back(std::make_unique<ShapeSignatureInferencePass>());
+    phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+    phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
  
      // TODO add more optimization passes (with a knob)
    }
diff --git a/compiler/luci/export/src/ProgressReporter.h b/compiler/luci/export/src/ProgressReporter.h

index e91f42592de18ed0e1d49affd4ab0735d286e9ff..5d55bcd075b1adf0370cdd1fa22869a540e8b230 100644 (file)
--- a/compiler/luci/export/src/ProgressReporter.h
+++ b/compiler/luci/export/src/ProgressReporter.h
@@ -28,7 +28,7 @@ class ProgressReporter : public logo::PhaseEventListener
  {
  public:
    ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
-      : _graph{graph}, _strategy{strategy}
+    : _graph{graph}, _strategy{strategy}
    {
      // DO NOTHING
    }
diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h

index c41f50edd99a6021293a4d4a2e988cb58a348152..df71e5c21d9baf26c35dd229041c60f211b4f279 100644 (file)
--- a/compiler/luci/export/src/SerializedData.h
+++ b/compiler/luci/export/src/SerializedData.h
@@ -48,6 +48,37 @@ struct OpCode
    }
  };
  
+class CircleExportMetadata
+{
+public:
+  void add_source_table(uint32_t source_id, std::string origin_name)
+  {
+    // Model with multiple subgraph may have different origin_name
+    // even if source_id is same. However, as we do not consider about
+    // multiple subgraph in profiling for now, just do not care those cases
+    // and support them correctly in the future.
+    _source_table.emplace(source_id, origin_name);
+  }
+
+  void add_op_table(uint32_t node_id, uint32_t source_id)
+  {
+    // Model with multiple subgraph may have duplicated node id.
+    // For now, as we do not consider about multiple subgraph in profiling,
+    // just ignore those cases and support them in the future.
+    if (_op_table.find(node_id) == _op_table.end())
+      _op_table.emplace(node_id, std::set<uint32_t>());
+    _op_table.at(node_id).emplace(source_id);
+  }
+
+public:
+  const std::vector<uint8_t> encoded_source_table(void);
+  const std::vector<uint8_t> encoded_op_table(void);
+
+private:
+  std::map<uint32_t, std::string> _source_table;
+  std::map<uint32_t, std::set<uint32_t>> _op_table;
+};
+
  } // namespace luci
  
  namespace std
@@ -86,6 +117,7 @@ struct SerializedModelData final
  
    std::unordered_map<OpCode, uint32_t> _operator_codes;
    std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
+  CircleExportMetadata _metadata;
  
    // This is used for removing buffers with same values
    std::map<luci::CircleConst *, uint32_t> _cached_buffer_id;
diff --git a/compiler/luci/export/src/TypeBridge.cpp b/compiler/luci/export/src/TypeBridge.cpp

deleted file mode 100644 (file)

index 9ccd523..0000000
--- a/compiler/luci/export/src/TypeBridge.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TypeBridge.h"
-
-#include "CircleExporterUtils.h"
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleTypeInference.h>
-#include <luci/Service/CircleShapeInference.h>
-
-#include <loco/Service/TypeInference.h>
-#include <loco/Service/ShapeInference.h>
-
-namespace
-{
-
-/**
- * @brief CopySelector will return condition of copy shape/type inference to node
- */
-struct CopySelector final : public luci::CircleNodeVisitor<bool>
-{
-  // return false(don't copy) for nodes that provides shape/type from nature
-  bool visit(const luci::CircleInput *) final { return false; }
-  bool visit(const luci::CircleConst *) final { return false; }
-
-  // default is copy attributes
-  bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace
-
-namespace luci
-{
-
-loco::TensorShape node_shape(CircleNode *node)
-{
-  loco::TensorShape shape;
-
-  shape.rank(node->rank());
-  for (uint32_t r = 0; r < node->rank(); ++r)
-  {
-    shape.dim(r) = loco::Dimension(node->dim(r).value());
-  }
-  return shape;
-}
-
-loco::DataType node_dtype(CircleNode *node) { return node->dtype(); }
-
-void copy_shape_dtype(loco::Graph *graph)
-{
-  /**
-   * @note We will iterate all the nodes in the graph to include dangle nodes
-   */
-  auto nodes = graph->nodes();
-  for (uint32_t n = 0; n < nodes->size(); ++n)
-  {
-    auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
-
-    CopySelector cs;
-    if (node->accept(&cs))
-    {
-      // NOTE not all nodes have infered shape/dtype: multiple outs may not be
-      //      visited when outputs are not used
-      // TODO fix shape inference traversal
-      // NOTE when loco supports multiple outputs in nature this issue should be
-      //      resolved also
-
-      if (loco::dtype_known(node))
-      {
-        node->dtype(loco::dtype_get(node));
-      }
-
-      if (loco::shape_known(node))
-      {
-        auto shape = loco::shape_get(node).as<loco::TensorShape>();
-        node->rank(shape.rank());
-        for (uint32_t r = 0; r < shape.rank(); ++r)
-        {
-          node->dim(r) = loco::Dimension(shape.dim(r).value());
-        }
-
-        // ShapeStatus should be update only when the status was UNDEFINED
-        if (node->shape_status() == ShapeStatus::UNDEFINED)
-          node->shape_status(ShapeStatus::VALID);
-      }
-    }
-  }
-}
-
-} // namespace luci
diff --git a/compiler/luci/export/src/TypeBridge.h b/compiler/luci/export/src/TypeBridge.h

deleted file mode 100644 (file)

index a63fbce..0000000
--- a/compiler/luci/export/src/TypeBridge.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TYPE_BRIDGE_H__
-#define __TYPE_BRIDGE_H__
-
-#include <luci/IR/CircleNode.h>
-
-#include <loco.h>
-
-namespace luci
-{
-
-/**
- * @brief  node_shape() will return loco::TensorShape of CircleNode
- */
-loco::TensorShape node_shape(CircleNode *node);
-
-/**
- * @brief  node_dtype() will return loco::DataType of CircleNode
- */
-loco::DataType node_dtype(CircleNode *node);
-
-/**
- * @brief copy_shape_dtype() will copy shape and dtype inference data to CircleNode
- */
-void copy_shape_dtype(loco::Graph *graph);
-
-} // namespace luci
-
-#endif // __TYPE_BRIDGE_H__
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt

index 2ae00b8376a28f78f13e2d6ee9f8a19bcc65cba4..642751ca6290a552b2d4c9bb4939e0cf34494a5a 100644 (file)
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -6,6 +6,7 @@ add_library(luci_import SHARED ${SOURCES})
  target_include_directories(luci_import PRIVATE src)
  target_include_directories(luci_import PUBLIC include)
  target_link_libraries(luci_import PUBLIC luci_lang)
+target_link_libraries(luci_import PUBLIC luci_profile)
  target_link_libraries(luci_import PUBLIC mio_circle)
  target_link_libraries(luci_import PRIVATE luci_env)
  target_link_libraries(luci_import PRIVATE luci_log)
diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h

index 8e210dd77eda913ac6d047bc3a00d80e16a8f294..b9697fb8672a4a0962824cc21d0d7b81718b9a61 100644 (file)
--- a/compiler/luci/import/include/luci/Import/CircleReader.h
+++ b/compiler/luci/import/include/luci/Import/CircleReader.h
@@ -23,7 +23,6 @@
  #include <luci/IR/AttrPadding.h>
  #include <luci/IR/CircleNode.h>
  #include <luci/IR/CircleQuantParam.h>
-#include <luci/IR/CircleShapeSignature.h>
  #include <luci/IR/SparsityParam.h>
  
  #include <loco.h>
@@ -64,6 +63,7 @@ private:
    using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>;
    using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>;
    using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>;
+  using CircleMetadata_t = std::vector<std::unique_ptr<circle::MetadataT>>;
  
    using CircleSubGraphsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
    using CircleTensorsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
@@ -79,6 +79,8 @@ public:
    const std::vector<int32_t> &inputs() const { return _current_subgraph->inputs; }
    const std::vector<int32_t> &outputs() const { return _current_subgraph->outputs; }
    const std::string &name() const { return _current_subgraph->name; }
+  const circle::DataFormat &data_format() const { return _current_subgraph->data_format; }
+  const CircleMetadata_t &metadata() const { return _model->metadata; }
  
    const CircleTensorsPtr_t *tensors_ptr() const { return _tensors_ptr; }
  
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilder.h b/compiler/luci/import/include/luci/Import/GraphBuilder.h

index 548264daca9ab32d0bb3b704811a1d36391c4b5a..0db612652c4a5ac6d229a965de4bb604e65544e7 100644 (file)
--- a/compiler/luci/import/include/luci/Import/GraphBuilder.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilder.h
@@ -33,7 +33,13 @@ class GraphBuilder : public GraphBuilderBase
  public:
    virtual ~GraphBuilder() = default;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+  // common validate method to check number of inputs and single output
+  bool validate(const ValidateArgs &args, size_t input_cnt) const
+  {
+    return (args.op.inputs.size() == input_cnt && args.op.outputs.size() == 1);
+  }
+
+  CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
  
  private:
    virtual CircleNode *build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderBase.h b/compiler/luci/import/include/luci/Import/GraphBuilderBase.h

index a0cd008e08f6995a772cfa23e83dd1f5bdbc6880..ddd4445cd49250ef92376995c7e6fbba3bf67f8b 100644 (file)
--- a/compiler/luci/import/include/luci/Import/GraphBuilderBase.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderBase.h
@@ -19,6 +19,8 @@
  
  #include "GraphBuilderContext.h"
  
+#include <luci/IR/CircleNode.h>
+
  #include <mio/circle/schema_generated.h>
  
  namespace luci
@@ -38,7 +40,7 @@ struct GraphBuilderBase
    };
  
    virtual bool validate(const ValidateArgs &) const = 0;
-  virtual void build(const circle::OperatorT &op, GraphBuilderContext *context) const = 0;
+  virtual CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const = 0;
  
    virtual ~GraphBuilderBase() = default;
  };
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderContext.h b/compiler/luci/import/include/luci/Import/GraphBuilderContext.h

index 72e237abcb206fb18a6719132e117ee0be1774a1..1673df43dd19af9fbfb60ba0baf030c42c8d5a27 100644 (file)
--- a/compiler/luci/import/include/luci/Import/GraphBuilderContext.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderContext.h
@@ -71,7 +71,7 @@ class GraphBuilderContext
  public:
    GraphBuilderContext(loco::Graph *g, CircleReader *reader, IndexNodeFinder *nodefinder,
                        IndexTensorOutputs *tensoroutputs)
-      : _g(g), _reader(reader), _indexnodefinder(nodefinder), _indextensoroutputs(tensoroutputs)
+    : _g(g), _reader(reader), _indexnodefinder(nodefinder), _indextensoroutputs(tensoroutputs)
    {
      // DO NOTHING
    }
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderMultiOutput.h b/compiler/luci/import/include/luci/Import/GraphBuilderMultiOutput.h

new file mode 100644 (file)

index 0000000..6e8791b
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderMultiOutput.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
+#define __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
+
+#include "GraphBuilderContext.h"
+#include "GraphBuilderBase.h"
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Base of general multiple outputs graph builder(e.g., CircleIfGraphBuilder)
+ */
+class GraphBuilderMultiOutput : public GraphBuilderBase
+{
+public:
+  virtual ~GraphBuilderMultiOutput() = default;
+
+  CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+
+protected:
+  struct BuildNodeArgs
+  {
+    BuildNodeArgs(const circle::OperatorT &o, GraphBuilderContext *c,
+                  const std::vector<CircleNode *> &i)
+      : op(o), context(c), input_nodes(i)
+    {
+    }
+
+    const circle::OperatorT &op;
+    GraphBuilderContext *context;
+    const std::vector<CircleNode *> &input_nodes;
+  };
+
+  struct BuildOutArgs
+  {
+    BuildOutArgs(CircleNode *nd, uint32_t n) : node(nd), index(n) {}
+
+    CircleNode *node;
+    uint32_t index;
+  };
+
+private:
+  virtual CircleNode *build_node(const BuildNodeArgs &) const = 0;
+  virtual CircleNode *build_out(const BuildOutArgs &) const = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h

index 28741064e9021e35f01ba474766fea908f213644..b084c7dbcc3033a05b49dae3c355c7295b39f043 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -27,6 +27,7 @@
  #include "Nodes/CircleBatchToSpaceND.h"
  #include "Nodes/CircleBCQFullyConnected.h"
  #include "Nodes/CircleBCQGather.h"
+#include "Nodes/CircleBidirectionalSequenceLSTM.h"
  #include "Nodes/CircleCast.h"
  #include "Nodes/CircleCeil.h"
  #include "Nodes/CircleConcatenation.h"
@@ -42,6 +43,7 @@
  #include "Nodes/CircleEqual.h"
  #include "Nodes/CircleExp.h"
  #include "Nodes/CircleExpandDims.h"
+#include "Nodes/CircleFakeQuant.h"
  #include "Nodes/CircleFill.h"
  #include "Nodes/CircleFloor.h"
  #include "Nodes/CircleFloorDiv.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h b/compiler/luci/import/include/luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h

new file mode 100644 (file)

index 0000000..4915172
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "luci/Import/GraphBuilderMultiOutput.h"
+
+namespace luci
+{
+
+class CircleBidirectionalSequenceLSTMGraphBuilder : public GraphBuilderMultiOutput
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleCustom.h b/compiler/luci/import/include/luci/Import/Nodes/CircleCustom.h

index 65745be4b662a9708cbde858cddd3b8c478daa29..f0d7e303db972112d4c35a16a89a33074475f88e 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleCustom.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleCustom.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_CUSTOM_H__
  #define __LUCI_IMPORT_OP_CIRCLE_CUSTOM_H__
  
-#include "luci/Import/GraphBuilder.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleCustomGraphBuilder : public GraphBuilderBase
+class CircleCustomGraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleFakeQuant.h b/compiler/luci/import/include/luci/Import/Nodes/CircleFakeQuant.h

new file mode 100644 (file)

index 0000000..9d9f7b0
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleFakeQuant.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
+#define __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleFakeQuantGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleIf.h b/compiler/luci/import/include/luci/Import/Nodes/CircleIf.h

index 8faf09cae69be14c69d6da707a109b104be668ad..94052f5beecca38174291101b4d2b8e61ae4747c 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleIf.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleIf.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_IF_H__
  #define __LUCI_IMPORT_OP_CIRCLE_IF_H__
  
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleIfGraphBuilder : public GraphBuilderBase
+class CircleIfGraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h

index f193aae35337d633a6cd9684df8e1d7b7f0e4809..4e8388b3ed4dfa8b359ef381c24272ea9c4788eb 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
  #define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
  
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleNonMaxSuppressionV4GraphBuilder : public GraphBuilderBase
+class CircleNonMaxSuppressionV4GraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h

index 62be0758ee518289f3e94ef7088e359a05988b60..4120a30eb731a74370de981ca0dc52231aca5b21 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
  #define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
  
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderBase
+class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSplit.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSplit.h

index 3395e40fd77de60c66a152b9a6441400d29eae55..5b45c9a9e7d01ee59c2eafb61ac72751f57b7830 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleSplit.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSplit.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_SPLIT_H__
  #define __LUCI_IMPORT_OP_CIRCLE_SPLIT_H__
  
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleSplitGraphBuilder : public GraphBuilderBase
+class CircleSplitGraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSplitV.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSplitV.h

index 3e53df3626572c3476139ad4a80b0c00cc8f2a05..de712f90c2d6d80964a2755ed1f9a0db3845bca5 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleSplitV.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSplitV.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_SPLIT_V_H__
  #define __LUCI_IMPORT_OP_CIRCLE_SPLIT_V_H__
  
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleSplitVGraphBuilder : public GraphBuilderBase
+class CircleSplitVGraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleTopKV2.h b/compiler/luci/import/include/luci/Import/Nodes/CircleTopKV2.h

index 8ec3f331192ca6399b0c08a7cdb69eb1811ae842..b4ad971304feaff9bd3605ea46a8dfda08cefde7 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleTopKV2.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleTopKV2.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_TOPK_V2_H__
  #define __LUCI_IMPORT_OP_CIRCLE_TOPK_V2_H__
  
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleTopKV2GraphBuilder : public GraphBuilderBase
+class CircleTopKV2GraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h b/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h

index ed5b5035d7a26e8b9880d437e72b48ed180f6da6..40e75ec7378b657256f87729d7b8b50ac84fce57 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
  #define __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
  
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleUniqueGraphBuilder : public GraphBuilderBase
+class CircleUniqueGraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleUnpack.h b/compiler/luci/import/include/luci/Import/Nodes/CircleUnpack.h

index f1a21de22301d816fc79bea8bcc45bf149f66327..0b623655fea5d1a0635f410d208f92bd926cee62 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleUnpack.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleUnpack.h
@@ -17,17 +17,19 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_UNPACK_H__
  #define __LUCI_IMPORT_OP_CIRCLE_UNPACK_H__
  
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
  
  namespace luci
  {
  
-class CircleUnpackGraphBuilder : public GraphBuilderBase
+class CircleUnpackGraphBuilder : public GraphBuilderMultiOutput
  {
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleWhile.h b/compiler/luci/import/include/luci/Import/Nodes/CircleWhile.h

index 68c56b3c6f8f4a365451474d2d6dbfd66028f0b1..69d23f823a70ba43f295f18260ddd4536d82369e 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleWhile.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleWhile.h
@@ -27,7 +27,7 @@ class CircleWhileGraphBuilder : public GraphBuilderBase
  public:
    bool validate(const ValidateArgs &args) const final;
  
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+  CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/src/CircleImportMetadata.cpp b/compiler/luci/import/src/CircleImportMetadata.cpp

new file mode 100644 (file)

index 0000000..f68f330
--- /dev/null
+++ b/compiler/luci/import/src/CircleImportMetadata.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleImportMetadata.h"
+
+#include <vector>
+
+namespace
+{
+
+uint32_t read_u32(const std::vector<uint8_t> &buffer, uint32_t idx)
+{
+  uint32_t val = 0;
+  val += (buffer.at(idx + 0) << 0 * 8);
+  val += (buffer.at(idx + 1) << 1 * 8);
+  val += (buffer.at(idx + 2) << 2 * 8);
+  val += (buffer.at(idx + 3) << 3 * 8);
+  return val;
+}
+
+} // namespace
+
+namespace
+{
+
+// 'source_table' is decoded to std::map<uint32_t, std::string> format.
+const std::map<uint32_t, std::string>
+decoded_source_table(const std::vector<uint8_t> &source_table_data)
+{
+  std::map<uint32_t, std::string> source_id_name_map;
+  uint32_t idx = 0;
+
+  if (source_table_data.size() < 4)
+    throw std::runtime_error("Source table decode error : invalid entry number");
+
+  uint32_t entry_number = read_u32(source_table_data, idx);
+  idx += sizeof(uint32_t);
+
+  while (idx < source_table_data.size())
+  {
+    if (idx + 2 * sizeof(uint32_t) > source_table_data.size())
+      throw std::runtime_error("Source table decode error : invalid entry item");
+
+    uint32_t id = read_u32(source_table_data, idx);
+    idx += sizeof(uint32_t);
+
+    uint32_t length = read_u32(source_table_data, idx);
+    idx += sizeof(uint32_t);
+
+    if (idx + sizeof(char) * length > source_table_data.size())
+      throw std::runtime_error("Source table decode error : invalid entry data");
+
+    // The last character of name is '\0'.
+    // However, as std::string do not use '\0' for finding the end of string,
+    // we ignore the character and do not include it in the string.
+    std::string origin_name;
+    for (uint32_t j = 0; j < length - 1; ++j)
+      origin_name += source_table_data.at(idx + j);
+    assert(source_table_data.at(idx + length - 1) == '\0');
+    idx += sizeof(char) * length;
+
+    if (source_id_name_map.insert({id, origin_name}).second == false)
+      throw std::runtime_error("Source table decode error : duplicated origin ID");
+  }
+
+  if (idx != source_table_data.size())
+    throw std::runtime_error("Source table decode error : data size invalid");
+
+  if (source_id_name_map.size() != entry_number)
+    throw std::runtime_error("Source table decode error : result size mismatch");
+
+  return source_id_name_map;
+}
+
+// 'op_table' is decoded to std::map<uint32_t, std::set<uint32_t>> format.
+const std::map<uint32_t, std::set<uint32_t>>
+decoded_op_table(const std::vector<uint8_t> &op_table_data)
+{
+  std::map<uint32_t, std::set<uint32_t>> node_source_ids_map;
+  uint32_t idx = 0;
+
+  if (op_table_data.size() < 4)
+    throw std::runtime_error("Op table decode error : invalid entry number");
+
+  uint32_t entry_number = read_u32(op_table_data, idx);
+  idx += sizeof(uint32_t);
+
+  while (idx < op_table_data.size())
+  {
+    if (idx + 2 * sizeof(uint32_t) > op_table_data.size())
+      throw std::runtime_error("Op table decode error : invalid entry item");
+
+    uint32_t id = read_u32(op_table_data, idx);
+    idx += sizeof(uint32_t);
+
+    uint32_t node_num = read_u32(op_table_data, idx);
+    idx += sizeof(uint32_t);
+
+    if (idx + sizeof(uint32_t) * node_num > op_table_data.size())
+      throw std::runtime_error("Source table decode error : invalid entry data");
+
+    std::set<uint32_t> source_ids;
+    for (uint32_t j = 0; j < node_num; ++j)
+    {
+      uint32_t origin = read_u32(op_table_data, idx);
+      idx += sizeof(uint32_t);
+
+      source_ids.insert(origin);
+    }
+
+    if (node_source_ids_map.insert({id, source_ids}).second == false)
+      throw std::runtime_error("Op table decode error : duplicated origin ID");
+  }
+
+  if (idx != op_table_data.size())
+    throw std::runtime_error("Op table decode error : data size invalid");
+
+  if (node_source_ids_map.size() != entry_number)
+    throw std::runtime_error("Op table decode error : entry number invalid");
+
+  return node_source_ids_map;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader)
+{
+  const auto &metadata = reader.metadata();
+  for (uint32_t i = 0; i < metadata.size(); ++i)
+  {
+    const circle::MetadataT &meta = *metadata[i];
+
+    assert(meta.buffer < reader.buffers().size());
+    const std::vector<uint8_t> &buffer = reader.buffers()[meta.buffer]->data;
+
+    if (meta.name.compare("ONE_op_table") == 0)
+      _op_table = decoded_op_table(buffer);
+    else if (meta.name.compare("ONE_source_table") == 0)
+      _source_table = decoded_source_table(buffer);
+  }
+}
+
+const OriginTable CircleImportMetadata::origin_table(void)
+{
+  OriginTable origin_table;
+
+  if (_op_table.size() > 0 && _source_table.size() > 0)
+  {
+    for (auto &kv : _op_table)
+    {
+      const auto node_id = kv.first;
+      const auto &source_ids = kv.second;
+
+      std::vector<std::shared_ptr<CircleNodeOrigin>> origins;
+      for (auto source_id : source_ids)
+      {
+        const auto source_name = _source_table.at(source_id);
+        origins.push_back(single_origin(source_id, source_name));
+      }
+
+      auto origin = composite_origin(origins);
+      origin_table.emplace(node_id, origin);
+    }
+  }
+
+  return origin_table;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/CircleImportMetadata.h b/compiler/luci/import/src/CircleImportMetadata.h

new file mode 100644 (file)

index 0000000..80176db
--- /dev/null
+++ b/compiler/luci/import/src/CircleImportMetadata.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_IMPORT_METADATA_H__
+#define __LUCI_CIRCLE_IMPORT_METADATA_H__
+
+#include "luci/Import/CircleReader.h"
+
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <map>
+#include <set>
+#include <string>
+
+namespace luci
+{
+
+using OriginTable = std::map<uint32_t, std::shared_ptr<CircleNodeOrigin>>;
+
+class CircleImportMetadata
+{
+public:
+  CircleImportMetadata() = delete;
+
+  CircleImportMetadata(const luci::CircleReader &reader);
+
+public:
+  /**
+   * @brief Create origin table using _source_table and _op_table in CircleImportMetadata
+   * @note  For creating origin table, both _op_table and _source_table should exist.
+   *        If one of them does not exist, empty table is returned.
+   */
+  const OriginTable origin_table(void);
+
+private:
+  // Decoded metadata is stored
+  std::map<uint32_t, std::string> _source_table;
+  std::map<uint32_t, std::set<uint32_t>> _op_table;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_IMPORT_METADATA_H__
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp

index b33c920b13307aebe07f296adc6176195fe2533b..861c1bbe3422c3da29a3d20fd1920d1da1d05730 100644 (file)
--- a/compiler/luci/import/src/CircleReader.cpp
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -190,19 +190,19 @@ luci_sparse_index_vector(const circle::SparseIndexVectorUnion &sparse_index_vect
      case circle::SparseIndexVector_Int32Vector:
      {
        const auto const_vec_ptr =
-          static_cast<const void *>(&(sparse_index_vector.AsInt32Vector()->values));
+        static_cast<const void *>(&(sparse_index_vector.AsInt32Vector()->values));
        return SparseIndexVector{SparseIndexVectorType::I32, const_vec_ptr};
      }
      case circle::SparseIndexVector_Uint16Vector:
      {
        const auto const_vec_ptr =
-          static_cast<const void *>(&(sparse_index_vector.AsUint16Vector()->values));
+        static_cast<const void *>(&(sparse_index_vector.AsUint16Vector()->values));
        return SparseIndexVector{SparseIndexVectorType::U16, const_vec_ptr};
      }
      case circle::SparseIndexVector_Uint8Vector:
      {
        const auto const_vec_ptr =
-          static_cast<const void *>(&(sparse_index_vector.AsUint8Vector()->values));
+        static_cast<const void *>(&(sparse_index_vector.AsUint8Vector()->values));
        return SparseIndexVector{SparseIndexVectorType::U8, const_vec_ptr};
      }
      default:
@@ -262,15 +262,19 @@ void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node)
    node->name(tensor_name(tensor));
    node->dtype(luci_datatype(tensor.type));
  
+  assert(tensor.shape_signature.size() == 0 ||
+         tensor.shape_signature.size() == tensor.shape.size());
+
    std::vector<int32_t> dims = tensor.shape; // in NHWC
    node->rank(dims.size());
    for (uint32_t r = 0; r < dims.size(); ++r)
    {
-    node->dim(r) = loco::Dimension(dims[r]);
+    if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+      node->dim(r).unset();
+    else
+      node->dim(r).set(dims[r]);
    }
  
-  node->shape_signature(tensor.shape_signature);
-
    const auto *quantization = tensor.quantization.get();
    if (quantization != nullptr)
    {
diff --git a/compiler/luci/import/src/GraphBuilder.cpp b/compiler/luci/import/src/GraphBuilder.cpp

index 80a9f986a0643d94177c711cfca39a26ef62b003..356501c2fa36af95c80a6db4de79175c4795d2f3 100644 (file)
--- a/compiler/luci/import/src/GraphBuilder.cpp
+++ b/compiler/luci/import/src/GraphBuilder.cpp
@@ -21,7 +21,7 @@
  namespace luci
  {
  
-void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
  {
    LOGGER(l);
  
@@ -47,7 +47,11 @@ void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *conte
      else
      {
        // If there is no tensor, insert CircleOutputExclude.
-      input_nodes.push_back(context->graph()->nodes()->create<luci::CircleOutputExclude>());
+      auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
+      // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
+      // a dummy type is inserted.
+      node->dtype(loco::DataType::FLOAT32);
+      input_nodes.push_back(node);
      }
    }
  
@@ -73,6 +77,8 @@ void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *conte
    {
      context->nodefinder()->enroll(outputs[0], node);
    }
+
+  return node;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/GraphBuilderMultiOutput.cpp b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp

new file mode 100644 (file)

index 0000000..9b42e99
--- /dev/null
+++ b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/GraphBuilderMultiOutput.h"
+
+#include <luci/Log.h>
+
+namespace luci
+{
+
+CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
+                                           GraphBuilderContext *context) const
+{
+  LOGGER(l);
+
+  assert(context != nullptr);
+
+  const std::vector<int32_t> &inputs = op.inputs;
+  const std::vector<int32_t> &outputs = op.outputs;
+  const auto &tensors = context->reader()->tensors();
+  const auto &opcodes = context->reader()->opcodes();
+  auto tensors_ptr = context->reader()->tensors_ptr();
+  assert(tensors_ptr != nullptr);
+
+  std::vector<CircleNode *> input_nodes;
+  for (const int32_t input_tensor_index : inputs)
+  {
+    if (input_tensor_index >= 0)
+    {
+      auto input = context->nodefinder()->node(input_tensor_index);
+      if (input == nullptr)
+        INFO(l) << "[luci] Warning: input node is null " << input_tensor_index << std::endl;
+      input_nodes.push_back(input);
+    }
+    else
+    {
+      // If there is no tensor, insert CircleOutputExclude.
+      auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
+      // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
+      // a dummy type is inserted.
+      node->dtype(loco::DataType::FLOAT32);
+      input_nodes.push_back(node);
+    }
+  }
+
+  BuildNodeArgs bna(op, context, input_nodes);
+  auto *node = build_node(bna);
+
+  uint32_t output_count = outputs.size();
+  assert(output_count > 0);
+  {
+    // Let's use attributes from output 0 for this node
+    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    node->name(tensor_name(output_tensor));
+    node->dtype(luci_datatype(output_tensor.type));
+
+    // mark operator version
+    node->op_version(opcodes[op.opcode_index].get()->version);
+
+    // NOTE We don't set quantization for multiple output nodes but to virtual outputs
+  }
+
+  // Create virtual outputs of Virtual Output node(s)
+  for (uint32_t n = 0; n < output_count; ++n)
+  {
+    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+    BuildOutArgs boa(node, n);
+    auto *nodeout = build_out(boa);
+
+    copy_tensor_attributes(output_tensor, nodeout);
+    // mark shape_status
+    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+      nodeout->shape_status(ShapeStatus::NOSHAPE);
+    else
+      nodeout->shape_status(ShapeStatus::VALID);
+
+    context->nodefinder()->enroll(outputs[n], nodeout);
+  }
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp

index d598d30f4f7bdd0f0b3ec6a2f0b5158217f551d6..7f98aab78b1733d5a9dd416f02c9fa29e19f52ef 100644 (file)
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -37,6 +37,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceNDGraphBuilder);                        // 37
    CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnectedGraphBuilder);                   // 253
    CIRCLE_NODE(BCQ_GATHER, CircleBCQGatherGraphBuilder);                                    // 252
+  CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, CircleBidirectionalSequenceLSTMGraphBuilder);   // 52
    CIRCLE_NODE(CAST, CircleCastGraphBuilder);                                               // 53
    CIRCLE_NODE(CEIL, CircleCeilGraphBuilder);                                               // 104
    CIRCLE_NODE(CUSTOM, CircleCustomGraphBuilder);                                           // 32
@@ -51,6 +52,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    CIRCLE_NODE(EQUAL, CircleEqualGraphBuilder);                                             // 71
    CIRCLE_NODE(EXP, CircleExpGraphBuilder);                                                 // 47
    CIRCLE_NODE(EXPAND_DIMS, CircleExpandDimsGraphBuilder);                                  // 70
+  CIRCLE_NODE(FAKE_QUANT, CircleFakeQuantGraphBuilder);                                    // 80
    CIRCLE_NODE(FILL, CircleFillGraphBuilder);                                               // 94
    CIRCLE_NODE(FLOOR, CircleFloorGraphBuilder);                                             // 8
    CIRCLE_NODE(FLOOR_DIV, CircleFloorDivGraphBuilder);                                      // 90
@@ -155,9 +157,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    // BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
    // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
    // BuiltinOperator_DELEGATE = 51,
-  // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
    // BuiltinOperator_ARG_MAX = 56,
-  // BuiltinOperator_FAKE_QUANT = 80,
    // BuiltinOperator_QUANTIZE = 114,
    // BuiltinOperator_HARD_SWISH = 117,
    // BuiltinOperator_DENSIFY = 124,
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp

index ab89f35879da67fdeb4cc03e4e2a8ae750773f93..193afffcb054ce29773a2fe87ca2b1f1dec0cc1a 100644 (file)
--- a/compiler/luci/import/src/Importer.cpp
+++ b/compiler/luci/import/src/Importer.cpp
@@ -15,6 +15,7 @@
   */
  
  #include "luci/Importer.h"
+#include "CircleImportMetadata.h"
  #include "PostImport.h"
  
  #include "luci/Import/GraphBuilder.h"
@@ -25,6 +26,8 @@
  
  #include <luci/IR/Module.h>
  #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeID.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  #include <luci/Log.h>
  #include <luci/LogHelper.h>
  
@@ -50,6 +53,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
    const auto &tensors = reader.tensors();
    auto tensors_ptr = reader.tensors_ptr();
    assert(tensors_ptr != nullptr);
+  auto circle_metadata = std::make_unique<luci::CircleImportMetadata>(reader);
  
    // build a cache to identify if a tensor is output of an operator
    // if this is set, we should not create a CircleConst for this tensor
@@ -96,12 +100,20 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
      // Data type
      graph_input->dtype(input_node->dtype());
  
+    assert(tensor.shape_signature.size() == 0 ||
+           tensor.shape_signature.size() == tensor.shape.size());
+
      // Shape of GraphInput
      auto input_shape = std::make_unique<loco::TensorShape>();
      const std::vector<int32_t> &input_dims = tensor.shape; // in NHWC
      input_shape->rank(input_dims.size());
      for (uint32_t r = 0; r < input_dims.size(); ++r)
-      input_shape->dim(r) = loco::Dimension(input_dims[r]);
+    {
+      if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+        input_shape->dim(r).unset();
+      else
+        input_shape->dim(r).set(input_dims[r]);
+    }
      graph_input->shape(std::move(input_shape));
    }
  
@@ -117,6 +129,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
    // Note that operators in model are stored in execution order. This means that when importing
    // an operator, its input operators have already been imported. We exploit this fact to set up
    // node's inputs right after creating the node.
+  auto origin_table = circle_metadata->origin_table();
    for (uint32_t i = 0; i < operators.size(); ++i)
    {
      const circle::OperatorT &op = *operators[i];
@@ -130,7 +143,12 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
          throw oops::UserExn("Invalid operator", reader.opcode_name(op));
        }
  
-      builder->build(op, &gb_context);
+      auto built_op = builder->build(op, &gb_context);
+      set_node_id(built_op, i);
+      if (origin_table.find(i) != origin_table.end())
+        add_origin(built_op, origin_table.at(i));
+      else
+        add_origin(built_op, luci::single_origin(i, built_op->name()));
      }
      else
      {
@@ -169,19 +187,28 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
      // set the graph output name and node object
      auto graph_output = graph->outputs()->create();
      std::string tname = luci::tensor_name(tensor);
-    graph_output->name("output_" + tname);
+    assert(tname.length() > 0);
+    graph_output->name(tname);
  
      luci::copy_tensor_attributes(tensor, output_node);
  
      // Set GraphInputOutputIndex for graph
      output_node->index(graph_output->index());
  
+    assert(tensor.shape_signature.size() == 0 ||
+           tensor.shape_signature.size() == tensor.shape.size());
+
      // Shape of Output
      auto output_shape = std::make_unique<loco::TensorShape>();
      const std::vector<int32_t> &output_dims = tensor.shape; // in NHWC
      output_shape->rank(output_dims.size());
      for (uint32_t r = 0; r < output_dims.size(); ++r)
-      output_shape->dim(r) = loco::Dimension(output_dims[r]);
+    {
+      if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+        output_shape->dim(r).unset();
+      else
+        output_shape->dim(r).set(output_dims[r]);
+    }
      graph_output->shape(std::move(output_shape));
  
      // Data type
diff --git a/compiler/luci/import/src/Nodes/CircleAbs.cpp b/compiler/luci/import/src/Nodes/CircleAbs.cpp

index 3556dc7fa8b67a42d854594e2169a6124bb68039..2a1601a218d064be44f3772fd6558c547599f828 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleAbs.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAbs.cpp
@@ -24,11 +24,8 @@ namespace luci
  {
  bool CircleAbsGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
    // TODO Support type check
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleAbsGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleAdd.cpp b/compiler/luci/import/src/Nodes/CircleAdd.cpp

index b767d4af2fa6b8c80ab752d394bedc7564de696e..94cbdf08103e917685e23bea1065a8dc45c3b981 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleAdd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAdd.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleAddGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleAddGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleArgMax.cpp b/compiler/luci/import/src/Nodes/CircleArgMax.cpp

index 10e8516f4707b436e04df585b3c1e29be133d241..fd8a84289737877c3b5c613663a734454fe85eaa 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleArgMax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleArgMax.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleArgMaxGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleArgMaxGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleArgMin.cpp b/compiler/luci/import/src/Nodes/CircleArgMin.cpp

index 5ff534dbbaecd7e53f3d465c531cd4ce3d28a765..63ca8db0326036d2ed28b00db70db58ed3c7db1c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleArgMin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleArgMin.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleArgMinGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleArgMinGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp

index ad011f71f58ee364b8e6605f5e195b5d1512942b..a351cf5e73061fcb453cc80d7be1a6668a50cc45 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
@@ -23,10 +23,7 @@ namespace luci
  
  bool CircleAveragePool2DGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleAveragePool2DGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp

index 16ecebd5c9ce1021345503c4ba0ca84e7566d1a6..4c86399ce593528f46017d2682cded132751f5bf 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleBCQFullyConnectedGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 5)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 5);
  }
  
  CircleNode *CircleBCQFullyConnectedGraphBuilder::build_node(const circle::OperatorT &op,
@@ -43,15 +40,6 @@ CircleNode *CircleBCQFullyConnectedGraphBuilder::build_node(const circle::Operat
    node->bias(inputs.at(3));
    node->weights_clusters(inputs.at(4));
  
-  // TODO Find and move to appropriate place for setting optional input
-  if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
-  {
-    // bias is not used for type inference, but node itself should have a type
-    bias->dtype(loco::DataType::FLOAT32);
-
-    // bias is not used for shape inference
-  }
-
    const auto *options = op.builtin_options.AsBCQFullyConnectedOptions();
    node->weights_hidden_size(options->weights_hidden_size);
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
diff --git a/compiler/luci/import/src/Nodes/CircleBCQGather.cpp b/compiler/luci/import/src/Nodes/CircleBCQGather.cpp

index 464f1ac18538063867b63c831c3bb208316047aa..ee1358197c72a5bb3d014794e05926514bbd87a8 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleBCQGather.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBCQGather.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleBCQGatherGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 4)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 4);
  }
  
  CircleNode *CircleBCQGatherGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp

index 330775691cfe1668c3f305b4c42e3e39d383c4cc..390719061969e89ba333ead47dca313f08ed46de 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp
@@ -23,10 +23,7 @@ namespace luci
  
  bool CircleBatchMatMulGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleBatchMatMulGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleBidirectionalSequenceLSTM.cpp b/compiler/luci/import/src/Nodes/CircleBidirectionalSequenceLSTM.cpp

new file mode 100644 (file)

index 0000000..f8bdcff
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleBidirectionalSequenceLSTM.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h"
+
+#include <luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h>
+#include <luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleBidirectionalSequenceLSTMGraphBuilder::validate(const ValidateArgs &args) const
+{
+  if (args.op.inputs.size() != 48)
+    return false;
+  if (args.op.outputs.size() != 2)
+    return false;
+
+  return true;
+}
+
+CircleNode *CircleBidirectionalSequenceLSTMGraphBuilder::build_node(const BuildNodeArgs &bna) const
+{
+  auto *node = bna.context->graph()->nodes()->create<CircleBidirectionalSequenceLSTM>();
+  auto &inputs = bna.input_nodes;
+  node->input(inputs.at(0));
+  node->fw_input_to_input_weights(inputs.at(1)); // Optional
+  node->fw_input_to_cell_weights(inputs.at(2));
+  node->fw_input_to_forget_weights(inputs.at(3));
+  node->fw_input_to_output_weights(inputs.at(4));
+  node->fw_recurrent_to_input_weights(inputs.at(5)); // Optional
+  node->fw_recurrent_to_cell_weights(inputs.at(6));
+  node->fw_recurrent_to_forget_weights(inputs.at(7));
+  node->fw_recurrent_to_output_weights(inputs.at(8));
+  node->fw_cell_to_input_weights(inputs.at(9));   // Optional
+  node->fw_cell_to_forget_weights(inputs.at(10)); // Optional
+  node->fw_cell_to_output_weights(inputs.at(11)); // Optional
+  node->fw_input_gate_bias(inputs.at(12));        // Optional
+  node->fw_forget_gate_bias(inputs.at(13));
+  node->fw_cell_gate_bias(inputs.at(14));
+  node->fw_output_gate_bias(inputs.at(15));
+  node->fw_projection_weights(inputs.at(16));     // Optional
+  node->fw_projection_bias(inputs.at(17));        // Optional
+  node->bw_input_to_input_weights(inputs.at(18)); // Optional
+  node->bw_input_to_cell_weights(inputs.at(19));
+  node->bw_input_to_forget_weights(inputs.at(20));
+  node->bw_input_to_output_weights(inputs.at(21));
+  node->bw_recurrent_to_input_weights(inputs.at(22)); // Optional
+  node->bw_recurrent_to_cell_weights(inputs.at(23));
+  node->bw_recurrent_to_forget_weights(inputs.at(24));
+  node->bw_recurrent_to_output_weights(inputs.at(25));
+  node->bw_cell_to_input_weights(inputs.at(26));  // Optional
+  node->bw_cell_to_forget_weights(inputs.at(27)); // Optional
+  node->bw_cell_to_output_weights(inputs.at(28)); // Optional
+  node->bw_input_gate_bias(inputs.at(29));        // Optional
+  node->bw_forget_gate_bias(inputs.at(30));
+  node->bw_cell_gate_bias(inputs.at(31));
+  node->bw_output_gate_bias(inputs.at(32));
+  node->bw_projection_weights(inputs.at(33)); // Optional
+  node->bw_projection_bias(inputs.at(34));    // Optional
+  node->fw_activation_state(inputs.at(35));
+  node->fw_cell_state(inputs.at(36));
+  node->bw_activation_state(inputs.at(37));
+  node->bw_cell_state(inputs.at(38));
+
+  node->auxillary_input(inputs.at(39));                      // Optional
+  node->fw_auxillary_input_to_input_weights(inputs.at(40));  // Optional
+  node->fw_auxillary_input_to_forget_weights(inputs.at(41)); // Optional
+  node->fw_auxillary_input_to_cell_weights(inputs.at(42));   // Optional
+  node->fw_auxillary_input_to_output_weights(inputs.at(43)); // Optional
+  node->bw_auxillary_input_to_input_weights(inputs.at(44));  // Optional
+  node->bw_auxillary_input_to_forget_weights(inputs.at(45)); // Optional
+  node->bw_auxillary_input_to_cell_weights(inputs.at(46));   // Optional
+  node->bw_auxillary_input_to_output_weights(inputs.at(47)); // Optional
+
+  const auto *options = bna.op.builtin_options.AsBidirectionalSequenceLSTMOptions();
+  node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+  node->cell_clip(options->cell_clip);
+  node->proj_clip(options->proj_clip);
+  node->merge_outputs(options->merge_outputs);
+  node->time_major(options->time_major);
+  node->asymmetric_quantize_inputs(options->asymmetric_quantize_inputs);
+
+  return node;
+}
+
+CircleNode *CircleBidirectionalSequenceLSTMGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleBidirectionalSequenceLSTMOut>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleCast.cpp b/compiler/luci/import/src/Nodes/CircleCast.cpp

index 7bdb630441ef3b7957eac337d7eaa903742f2b91..3e8c08bfaad7c9886033ef045600d5fc41274a91 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCast.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCast.cpp
@@ -30,14 +30,13 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const
  {
    LOGGER(l);
  
+  if (!GraphBuilder::validate(args, 1))
+    return false;
+
    auto settings = luci::UserSettings::settings();
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
-    return false;
  
    // NOTE real models do have type mismatch
    const auto *options = args.op.builtin_options.AsCastOptions();
diff --git a/compiler/luci/import/src/Nodes/CircleCeil.cpp b/compiler/luci/import/src/Nodes/CircleCeil.cpp

index 2e1aaa295987177dfd4219aa1c913a3fb3414044..d439f41cdb267be60332a848c603c216acf92453 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCeil.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCeil.cpp
@@ -25,16 +25,8 @@ namespace luci
  
  bool CircleCeilGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
-    return false;
-
    // TODO dtype check
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleCeilGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleConv2D.cpp b/compiler/luci/import/src/Nodes/CircleConv2D.cpp

index 9516ef16a71b7bb292996b6475e546ce68637112..8cbecdc003bb8aac8f6f34ca87c99bf609d60afa 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConv2D.cpp
@@ -28,10 +28,7 @@ namespace luci
  bool CircleConv2DGraphBuilder::validate(const ValidateArgs &args) const
  {
    // Circle Conv2D may not have a bias but we won't support this
-  if (args.op.inputs.size() != 3)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 3);
  }
  
  CircleNode *CircleConv2DGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleCos.cpp b/compiler/luci/import/src/Nodes/CircleCos.cpp

index 27d60c62c2b753929084ca2731bff591f3dd6d37..9705202ee5b15561589b789612fc6e1f305880bd 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCos.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCos.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleCosGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleCosGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleCustom.cpp b/compiler/luci/import/src/Nodes/CircleCustom.cpp

index d541ee87b6f8806d2c423fd02b1b21f581a2cfbd..01ac3e2a0050dd3d4ae1cee422f0a2a8df1df2ca 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCustom.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCustom.cpp
@@ -27,62 +27,39 @@ bool CircleCustomGraphBuilder::validate(const ValidateArgs &) const
    return true;
  }
  
-void CircleCustomGraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleCustomGraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
+  uint32_t input_count = bna.op.inputs.size();
+  uint32_t output_count = bna.op.outputs.size();
  
-  auto graph = context->graph();
+  auto *node = bna.context->graph()->nodes()->create<CircleCustom>(input_count, output_count);
  
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  for (uint32_t idx = 0; idx < input_count; ++idx)
+  {
+    node->inputs(idx, bna.input_nodes[idx]);
+  }
  
-  // Create CircleCustom
-  const auto &opcodes = context->reader()->opcodes();
-  const uint32_t opcode_index = op.opcode_index;
+  const auto &opcodes = bna.context->reader()->opcodes();
+  const uint32_t opcode_index = bna.op.opcode_index;
    const circle::OperatorCodeT &opcode = *opcodes[opcode_index];
  
-  auto *node = graph->nodes()->create<CircleCustom>(inputs.size());
-  uint32_t input_idx = 0;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    node->inputs(input_idx++, context->nodefinder()->node(input_tensor_index));
-  }
-  node->custom_options(std::vector<uint8_t>{op.custom_options.begin(), op.custom_options.end()});
+  node->custom_options(
+    std::vector<uint8_t>{bna.op.custom_options.begin(), bna.op.custom_options.end()});
    node->custom_code(opcode.custom_code);
-  // Operator version of custom is always 1, so do nothing
  
-  uint32_t output_count = outputs.size();
+  // NOTE Operator version of custom is always 1
  
-  assert(output_count > 0);
-  {
-    // Let's use attributes from output 0 for this node
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->dtype(luci_datatype(output_tensor.type));
-  }
-
-  // Create virtual outputs of Custom
-  for (uint32_t n = 0; n < output_count; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+  return node;
+}
  
-    auto *nodeout = graph->nodes()->create<CircleCustomOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleCustomGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleCustomOut>();
  
-    nodeout->input(node);
-    nodeout->index(n);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
  
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp

index 49d31bb99dec905e3c5452e6de3b92d1dfe1bad7..49eb30a838a830c5bad5854f71c1a9f9d7dd30e3 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
@@ -27,17 +27,13 @@ namespace luci
  
  bool CircleDepthToSpaceGraphBuilder::validate(const ValidateArgs &args) const
  {
+  if (!GraphBuilder::validate(args, 1))
+    return false;
+
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
  
    const auto *options = args.op.builtin_options.AsDepthToSpaceOptions();
-
-  if (inputs.size() != 1)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
    const auto &tensors = args.reader.tensors();
  
    if (tensors[outputs[0]]->type != tensors[inputs.at(0)]->type)
diff --git a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp

index 53f85f2f51a60b3543797da8bdf912ea6ffba667..727487c6a28fece90e8c849a121c982bdde8f11f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -32,6 +32,32 @@ bool CircleDepthwiseConv2DGraphBuilder::validate(const ValidateArgs &args) const
    if (args.op.outputs.size() != 1)
      return false;
  
+  const auto &tensors = args.reader.tensors();
+
+  // input shape
+  const auto &input = tensors.at(args.op.inputs.at(0));
+  const auto &input_shape = input->shape;
+
+  // input shape must be rank 4
+  if (input_shape.size() != 4)
+    return false;
+
+  // filter shape
+  const auto &filter = tensors.at(args.op.inputs.at(1));
+  const auto &filter_shape = filter->shape;
+
+  // filter shape must be rank 4
+  if (filter_shape.size() != 4)
+    return false;
+
+  // multiplier
+  const auto *options = args.op.builtin_options.AsDepthwiseConv2DOptions();
+  const auto &multiplier = options->depth_multiplier;
+
+  // filter represents as [1, H, W, C*M] where M is multiplier.
+  if (filter_shape.at(3) != input_shape.at(3) * multiplier)
+    return false;
+
    return true;
  }
  
diff --git a/compiler/luci/import/src/Nodes/CircleDequantize.cpp b/compiler/luci/import/src/Nodes/CircleDequantize.cpp

index 1936da97c3874477e6c98b587835d83748b8078c..3db546bd005f15fd40151a3279dd44f41f23f2a2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDequantize.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDequantize.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleDequantizeGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleDequantizeGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleDiv.cpp b/compiler/luci/import/src/Nodes/CircleDiv.cpp

index 615c224d731c0b98f308018e225254aee4c37013..7ea1afd950eaa3399d7c002e1eec8c8322c39253 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDiv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDiv.cpp
@@ -23,13 +23,7 @@ namespace luci
  
  bool CircleDivGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleDivGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleElu.cpp b/compiler/luci/import/src/Nodes/CircleElu.cpp

index 919e95ee4237c6b1a47b2896bd0fd7612f44a91a..461da95178dad1eb5a6c182dcc3e0b8c0c42be84 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleElu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleElu.cpp
@@ -25,14 +25,11 @@ namespace luci
  
  bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
-  if (outputs.size() != 1)
-    return false;
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
  
    const auto &tensors = args.reader.tensors();
    const auto &tensor = tensors.at(inputs.at(0));
diff --git a/compiler/luci/import/src/Nodes/CircleEqual.cpp b/compiler/luci/import/src/Nodes/CircleEqual.cpp

index 1db33b8ac5209419421341b36184feb771141b00..4909692b43695fbf24f0e27df30f8faf379f3fd7 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleEqual.cpp
@@ -25,13 +25,10 @@ namespace luci
  
  bool CircleEqualGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
      return false;
-  }
  
+  const auto &inputs = args.op.inputs;
    const auto &tensors = args.reader.tensors();
  
    return tensors[inputs.at(0)]->type == tensors[inputs.at(1)]->type;
diff --git a/compiler/luci/import/src/Nodes/CircleExp.cpp b/compiler/luci/import/src/Nodes/CircleExp.cpp

index 2c031d6b3cac4b1e9f9b42e3661381c079e7bd10..64f18fbd4b84bfa1e3265d7d2ebf077ec82e715f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleExp.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExp.cpp
@@ -25,10 +25,10 @@ namespace luci
  
  bool CircleExpGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
    // input type check
    const auto &tensors = args.reader.tensors();
    const auto &tensor = tensors.at(inputs.at(0));
diff --git a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp

index ab537c710599d9150d16f787cb5e8213334c6f1b..ee0fbdc7e8d3b813e783f3b5918c5814ec529b0f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
@@ -25,13 +25,10 @@ namespace luci
  
  bool CircleExpandDimsGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
      return false;
-  }
  
+  const auto &inputs = args.op.inputs;
    const auto &tensors = args.reader.tensors();
  
    return tensors[inputs.at(1)]->type == circle::TensorType_INT32;
diff --git a/compiler/luci/import/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/import/src/Nodes/CircleFakeQuant.cpp

new file mode 100644 (file)

index 0000000..7cf40b2
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleFakeQuant.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleFakeQuant.h"
+
+#include <luci/IR/Nodes/CircleFullyConnected.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleFakeQuantGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleFakeQuantGraphBuilder::build_node(const circle::OperatorT &op,
+                                                    const std::vector<CircleNode *> &inputs,
+                                                    loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleFakeQuant>();
+  node->inputs(inputs.at(0));
+
+  const auto *options = op.builtin_options.AsFakeQuantOptions();
+  node->min(options->min);
+  node->max(options->max);
+  node->num_bits(options->num_bits);
+  node->narrow_range(options->narrow_range);
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleFill.cpp b/compiler/luci/import/src/Nodes/CircleFill.cpp

index 95d5b876be0e8ee21ae8547796a7a04ce47c517e..9aacddcbe8724895f1a4712aeb4b72fed9f20d1b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFill.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFill.cpp
@@ -23,13 +23,7 @@ namespace luci
  
  bool CircleFillGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleFillGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleFloor.cpp b/compiler/luci/import/src/Nodes/CircleFloor.cpp

index ce756b3b149bdea31a9dca0ff60e6c943c3858f3..9651259c7f24e38a7a988625bb675644e0f87dc6 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFloor.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloor.cpp
@@ -25,16 +25,8 @@ namespace luci
  
  bool CircleFloorGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
-    return false;
-
    // TODO dtype check
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleFloorGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp

index 55f385d60e4a2926d24c22cf3f8366ddcd434188..ce329326a7fc4d65bed7827cb3e51071bcda7d91 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
@@ -25,19 +25,11 @@ namespace luci
  
  bool CircleFloorDivGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
      return false;
-  }
-
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor_in_0 = tensors.at(inputs.at(0));
    const auto &tensor_in_1 = tensors.at(inputs.at(1));
diff --git a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp

index 2101e417ed4ab3e00eb40105fa551957a49a6640..d8420a43c575abb6de5a51e0aee663877edfbf78 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
@@ -25,13 +25,10 @@ namespace luci
  
  bool CircleFloorModGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor_in_0 = tensors.at(inputs.at(0));
    const auto &tensor_in_1 = tensors.at(inputs.at(1));
diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp

index 17293ad7a91be0a905cbcb0058b4a378c7f27cd0..58750d79a8cfdfc44de8de512197037ecf72708b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
@@ -27,10 +27,7 @@ namespace luci
  
  bool CircleFullyConnectedGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 3)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 3);
  }
  
  CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT &op,
@@ -42,15 +39,6 @@ CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT
    node->weights(inputs.at(1));
    node->bias(inputs.at(2)); // bias is optional
  
-  // TODO Find and move to appropriate place for setting optional input
-  if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
-  {
-    // bias is not used for type inference, but node itself should have a type
-    bias->dtype(loco::DataType::FLOAT32);
-
-    // bias is not used for shape inference
-  }
-
    const auto *options = op.builtin_options.AsFullyConnectedOptions();
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
    node->weights_format(luci_weights_format(options->weights_format));
diff --git a/compiler/luci/import/src/Nodes/CircleGather.cpp b/compiler/luci/import/src/Nodes/CircleGather.cpp

index 75447a38a7383cee48c88800a4dc4c572eadc5a6..8317a3340f279d6c4500a58850af906dccb5fbd2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGather.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGather.cpp
@@ -26,18 +26,14 @@ namespace luci
  
  bool CircleGatherGraphBuilder::validate(const ValidateArgs &args) const
  {
+  if (!GraphBuilder::validate(args, 2))
+    return false;
+
    const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
    const auto *options = args.op.builtin_options.AsGatherOptions();
  
    int32_t axis = options->axis;
  
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
    if (axis < 0)
      axis += inputs.size();
  
diff --git a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp

index 981adbf635ae0e01abee94f30d388b2d29ad0c72..a4bb26a10bacb3e12d1af4c5dacc584cdf4b8685 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
@@ -27,15 +27,10 @@ namespace luci
  
  bool CircleGatherNdGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
    auto &indices_tensor = args.reader.tensors()[inputs.at(1)];
  
    if (!(indices_tensor->type == circle::TensorType::TensorType_INT32 ||
diff --git a/compiler/luci/import/src/Nodes/CircleGreater.cpp b/compiler/luci/import/src/Nodes/CircleGreater.cpp

index 1ad0467e497ea4cc42509a9afcc86ffcda4ae150..f9c00346c7fd01aa41296cf638f676a86c15a34e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGreater.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreater.cpp
@@ -30,17 +30,13 @@ bool CircleGreaterGraphBuilder::validate(const ValidateArgs &args) const
  {
    LOGGER(l);
  
+  if (!GraphBuilder::validate(args, 2))
+    return false;
+
    auto settings = luci::UserSettings::settings();
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
    const auto &tensors = args.reader.tensors();
  
    if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
diff --git a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp

index 0ac63b0179abf521f4a611763588bd42bb1b9c28..e20038fd903ed1ded74b0a19a9505696054e489e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
@@ -25,19 +25,11 @@ namespace luci
  
  bool CircleGreaterEqualGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
      return false;
-  }
-
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
  
    if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
diff --git a/compiler/luci/import/src/Nodes/CircleIf.cpp b/compiler/luci/import/src/Nodes/CircleIf.cpp

index db9ffe1cde1efb8fac47b72046d2afb522a0fdde..ffdbf0b791a23f0d043f3d4267fa1ccd14111bc4 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleIf.cpp
+++ b/compiler/luci/import/src/Nodes/CircleIf.cpp
@@ -70,69 +70,34 @@ bool CircleIfGraphBuilder::validate(const ValidateArgs &args) const
   *                       \- CircleIfOut --- Node ---
   */
  
-void CircleIfGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleIfGraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
+  uint32_t input_count = bna.op.inputs.size() - 1;
+  uint32_t output_count = bna.op.outputs.size();
  
-  auto graph = context->graph();
+  auto *node = bna.context->graph()->nodes()->create<CircleIf>(input_count, output_count);
  
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  uint32_t input_count = inputs.size() - 1;
-  uint32_t output_count = outputs.size();
-
-  // Create CircleIf
-  CircleIf *node = graph->nodes()->create<CircleIf>(input_count, output_count);
-
-  node->cond(input_nodes[0]);
+  node->cond(bna.input_nodes[0]);
    for (uint32_t idx = 0; idx < input_count; ++idx)
    {
-    node->input(idx, input_nodes[idx + 1]);
+    node->input(idx, bna.input_nodes[idx + 1]);
    }
  
-  const auto *options = op.builtin_options.AsIfOptions();
+  const auto *options = bna.op.builtin_options.AsIfOptions();
    node->then_branch(options->then_subgraph_index);
    node->else_branch(options->else_subgraph_index);
  
-  assert(outputs.size() > 0);
-  {
-    // Lets use name of output 0 as If name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for If itself but to virtual outputs
-  }
-
-  // Create virtual outputs of If
-  for (uint32_t n = 0; n < output_count; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+  return node;
+}
  
-    auto *nodeout = graph->nodes()->create<CircleIfOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleIfGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleIfOut>();
  
-    nodeout->input(node);
-    nodeout->index(n);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
  
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp

index 6349fd3b7eff2977b2d727b634d30a144ca220d3..977b5340645f0ae651bf4c08da170db39c5c0b6b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp
+++ b/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp
@@ -25,12 +25,8 @@ namespace luci
  
  bool CircleInstanceNormGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 3)
-    return false;
-
    // TODO check dtypes
-
-  return true;
+  return GraphBuilder::validate(args, 3);
  }
  
  CircleNode *CircleInstanceNormGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp

index e4fdc200c9ef40b37c3609d40ca9cde821d7dc2b..7e1faedfb69e5856e7f49e3a4db83a606e1ba5f4 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp
+++ b/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp
@@ -25,20 +25,7 @@ namespace luci
  
  bool CircleL2NormalizeGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
-  {
-    return false;
-  }
-
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleL2NormalizeGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp

index 202d9d6fb171dd66a93394bb5677d5e85131e847..849c7c5edf329265947b4330c854b431607a8436 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp
@@ -25,12 +25,8 @@ namespace luci
  
  bool CircleL2Pool2DGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
    // TODO check dtypes
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleL2Pool2DGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp

index ad4979f39a4dc17ade853cf35c5505c9e82e07de..880fa64287bfb1cce142250c5919b713c6424168 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CircleLeakyReluGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleLeakyReluGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleLess.cpp b/compiler/luci/import/src/Nodes/CircleLess.cpp

index 5060369081f07f6bcd15bf81dded54d0eb385d7f..f9b99bebe8611e9e8aaf712dd6735b773e539460 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLess.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLess.cpp
@@ -25,19 +25,11 @@ namespace luci
  
  bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
      return false;
-  }
-
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor = tensors.at(inputs.at(0));
  
diff --git a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp

index 9b4f934a5cadd6a93280bc9aec9d0eb008d7b9b3..bb17121370ea3b6d63cbde882d8e9cfc9763d902 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
@@ -25,19 +25,11 @@ namespace luci
  
  bool CircleLessEqualGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
      return false;
-  }
-
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
  
    if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
diff --git a/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp

index 0e32f62de81753c8d1711539031b12f125990e5c..d03c47d1250d4c1ccb764223caca52007d0f6f4f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp
@@ -25,16 +25,12 @@ namespace luci
  
  bool CircleLocalResponseNormalizationGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
    // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleLocalResponseNormalizationGraphBuilder::build_node(
-    const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+  const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLocalResponseNormalization>();
    node->input(inputs.at(0));
diff --git a/compiler/luci/import/src/Nodes/CircleLog.cpp b/compiler/luci/import/src/Nodes/CircleLog.cpp

index 346fc43bbf7c8a68a14ede17e9b604319b584b96..26b575070772c26a7adadaa0fcda8bbc6e0ffe7c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLog.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLog.cpp
@@ -25,12 +25,10 @@ namespace luci
  
  bool CircleLogGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-  if (args.op.outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
    // input type check
    // Must be one of bfloat16, half, float32, float64, complex64, complex128.
    // Currently circle supports half(float16), float32, float64, complex64.
diff --git a/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp

index ef69e868a1ee13bd0c4775cc3924841286d8b097..4361db6919b0f26bb125ca7b11864b59f70bcc74 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp
@@ -25,12 +25,8 @@ namespace luci
  
  bool CircleLogSoftmaxGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
    // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleLogSoftmaxGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp

index 7844da0f6c7aee72df1faf5094aff83916bcefba..b13fc2735f442c4a3177ba5b0b5c8ac454afbef9 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
@@ -25,11 +25,11 @@ namespace luci
  
  bool CircleLogicalAndGraphBuilder::validate(const ValidateArgs &args) const
  {
-  // Only BOOL type is allowed for inputs
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 2)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  // Only BOOL type is allowed for inputs
+  const auto &inputs = args.op.inputs;
    const auto &tensors = args.reader.tensors();
    for (auto input : inputs)
    {
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp

index 3758642e4a27a0df1ca6546f38fe0663a904bc2c..f682183496ffb60247005df14015913b9012efbf 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
@@ -25,7 +25,7 @@ namespace luci
  
  bool CircleLogicalNotGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
    // Only BOOL type is allowed for the input
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp

index 1b87e6f9cded729ee9f8ffd169545b64bef2d9ad..8c9023dd3e2f8b1dde015af950f04cc268b96a93 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
@@ -25,7 +25,7 @@ namespace luci
  
  bool CircleLogicalOrGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
    // Only BOOL type is allowed for inputs
diff --git a/compiler/luci/import/src/Nodes/CircleLogistic.cpp b/compiler/luci/import/src/Nodes/CircleLogistic.cpp

index 9606e19cd72d19c5ab2c5a1ac95e80a973e58196..0f92a9bb47a3d229bafc7bdc2b69770408a82c48 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogistic.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogistic.cpp
@@ -25,13 +25,11 @@ namespace luci
  
  bool CircleLogisticGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-  const auto &outputs = args.op.outputs;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
      return false;
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp

index a4a21a8b7964c1e3692ec454067fe085a8ec9072..590a07f2dc6167c51b8c9666bb3a21695290a5ee 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
@@ -25,15 +25,11 @@ namespace luci
  
  bool CircleMatrixDiagGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor = tensors.at(inputs.at(0));
  
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp

index cf03131497a60c85358e47c6caf31727bd164969..edd7d2ae2214bb646ce5b80ff9a950629f5a148e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
@@ -25,15 +25,11 @@ namespace luci
  
  bool CircleMatrixSetDiagGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor = tensors.at(inputs.at(0));
  
diff --git a/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp

index 4bca0f40b85581f93955e1d51af51c2f904fb2bf..5c03fff18d653ca86afd1964ffcc674c42ef7679 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleMaxPool2DGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleMaxPool2DGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleMean.cpp b/compiler/luci/import/src/Nodes/CircleMean.cpp

index d8fa9a53d5e2d107bba3159f74be2f2121889de5..7882f17fca966f94666d2c03b9f32801a9899bec 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMean.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMean.cpp
@@ -23,10 +23,7 @@ namespace luci
  
  bool CircleMeanGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleMeanGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp

index e0ddd4c11eb3504a63a081ed2dad8defac64a719..e40ce22496d9ddfed33c8e9c9caf1a3ff2c42965 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp
@@ -25,12 +25,8 @@ namespace luci
  
  bool CircleMirrorPadGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
    // TODO check others
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleMirrorPadGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleMul.cpp b/compiler/luci/import/src/Nodes/CircleMul.cpp

index e3c4a7ee5e264c45bcbdc8f6f8c826a1c6f5fc5b..28421f8c450f5a8d3c3590fe2047d7dbaf7ababa 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMul.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMul.cpp
@@ -23,13 +23,7 @@ namespace luci
  
  bool CircleMulGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleMulGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleNeg.cpp b/compiler/luci/import/src/Nodes/CircleNeg.cpp

index a64a695609063d9e02bbee0859469f3ae10fdd83..9dd1458f43ef8981be8cefb0f9fddc12bcb9802a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNeg.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNeg.cpp
@@ -24,11 +24,8 @@ namespace luci
  {
  bool CircleNegGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
    // TODO Support type check
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleNegGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp

index a4ad4a53dbf67c5189b7198ef1a0cc894f3244da..d3d69506be90f1da92ac092dd7498e5b3dd758c3 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -61,63 +61,27 @@ bool CircleNonMaxSuppressionV4GraphBuilder::validate(const ValidateArgs &args) c
   *         We will create multiple NonMasSuppressionV4Oout nodes to emulate this
   */
  
-void CircleNonMaxSuppressionV4GraphBuilder::build(const circle::OperatorT &op,
-                                                  GraphBuilderContext *context) const
+CircleNode *CircleNonMaxSuppressionV4GraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
-
-  auto graph = context->graph();
-
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleNonMaxSuppressionV4
-  auto node = graph->nodes()->create<CircleNonMaxSuppressionV4>();
-  node->boxes(input_nodes[0]);
-  node->scores(input_nodes[1]);
-  node->max_output_size(input_nodes[2]);
-  node->iou_threshold(input_nodes[3]);
-  node->score_threshold(input_nodes[4]);
-
-  assert(outputs.size() == 2);
-  {
-    // Let's use name of output 0 as NonMaxSuppressionV4 name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for NonMaxSuppressionV4 itself but to virtual outputs
-  }
-
-  // Create virtual outputs of NonMaxSuppressionV4
-  for (size_t n = 0; n < outputs.size(); ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV4Out>();
-    copy_tensor_attributes(output_tensor, nodeout);
-
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  auto node = bna.context->graph()->nodes()->create<CircleNonMaxSuppressionV4>();
+
+  node->boxes(bna.input_nodes[0]);
+  node->scores(bna.input_nodes[1]);
+  node->max_output_size(bna.input_nodes[2]);
+  node->iou_threshold(bna.input_nodes[3]);
+  node->score_threshold(bna.input_nodes[4]);
+
+  return node;
+}
+
+CircleNode *CircleNonMaxSuppressionV4GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleNonMaxSuppressionV4Out>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp

index 241dbf5ff9b36df86d48be5a767f80f34a0187f0..d797d4cb79e415fcb5828da0669c65e76ac98f23 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -63,64 +63,28 @@ bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) c
   *         We will create multiple NonMasSuppressionV5Oout nodes to emulate this
   */
  
-void CircleNonMaxSuppressionV5GraphBuilder::build(const circle::OperatorT &op,
-                                                  GraphBuilderContext *context) const
+CircleNode *CircleNonMaxSuppressionV5GraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
-
-  auto graph = context->graph();
-
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleNonMaxSuppressionV5
-  auto node = graph->nodes()->create<CircleNonMaxSuppressionV5>();
-  node->boxes(input_nodes[0]);
-  node->scores(input_nodes[1]);
-  node->max_output_size(input_nodes[2]);
-  node->iou_threshold(input_nodes[3]);
-  node->score_threshold(input_nodes[4]);
-  node->soft_nms_sigma(input_nodes[5]);
-
-  assert(outputs.size() == 3);
-  {
-    // Let's use name of output 0 as NonMaxSuppressionV5 name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for NonMaxSuppressionV5 itself but to virtual outputs
-  }
-
-  // Create virtual outputs of NonMaxSuppressionV5
-  for (size_t n = 0; n < outputs.size(); ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV5Out>();
-    copy_tensor_attributes(output_tensor, nodeout);
-
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  auto node = bna.context->graph()->nodes()->create<CircleNonMaxSuppressionV5>();
+
+  node->boxes(bna.input_nodes[0]);
+  node->scores(bna.input_nodes[1]);
+  node->max_output_size(bna.input_nodes[2]);
+  node->iou_threshold(bna.input_nodes[3]);
+  node->score_threshold(bna.input_nodes[4]);
+  node->soft_nms_sigma(bna.input_nodes[5]);
+
+  return node;
+}
+
+CircleNode *CircleNonMaxSuppressionV5GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleNonMaxSuppressionV5Out>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp

index 77e986de155472894539560686140009ec4f6d1c..a0b8f9e4f72245e8aaae4b903f2c8766ccfd3d0f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
@@ -25,19 +25,11 @@ namespace luci
  
  bool CircleNotEqualGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
      return false;
-  }
-
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
  
    if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
diff --git a/compiler/luci/import/src/Nodes/CircleOneHot.cpp b/compiler/luci/import/src/Nodes/CircleOneHot.cpp

index 69294e1ed156f5de7fe47f392c5de620c4c15f26..3952cc21a6ebee8ecf386648405c8ba780da6cd4 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleOneHot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleOneHot.cpp
@@ -26,17 +26,12 @@ namespace luci
  
  bool CircleOneHotGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  const auto *options = args.op.builtin_options.AsOneHotOptions();
-
    // Only 4 Input come refered from
-  if (inputs.size() != 4)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 4))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto *options = args.op.builtin_options.AsOneHotOptions();
    const auto &tensors = args.reader.tensors();
    const auto &indices = tensors.at(inputs.at(0));
    const auto &depth = tensors.at(inputs.at(1));
diff --git a/compiler/luci/import/src/Nodes/CirclePRelu.cpp b/compiler/luci/import/src/Nodes/CirclePRelu.cpp

index c07920f7c60d0bb533e94516856d3dd775bb54b3..7c81f04bb6fb142c0117438c321924987cffcef2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CirclePRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePRelu.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CirclePReluGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CirclePReluGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CirclePad.cpp b/compiler/luci/import/src/Nodes/CirclePad.cpp

index 999173b903c950e3fa97e2c3d4847acef2e18d00..67dce6deeb6685368e26ac2b9057ee186aed505c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CirclePad.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePad.cpp
@@ -25,12 +25,8 @@ namespace luci
  
  bool CirclePadGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
    // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CirclePadGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CirclePadV2.cpp b/compiler/luci/import/src/Nodes/CirclePadV2.cpp

index 493876e680f7cbb36fe5bb7ad8b54e06d8e46433..84a45722ab1758442bf63ea87ed41f6af6705707 100644 (file)
--- a/compiler/luci/import/src/Nodes/CirclePadV2.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePadV2.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CirclePadV2GraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 3)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 3);
  }
  
  CircleNode *CirclePadV2GraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CirclePow.cpp b/compiler/luci/import/src/Nodes/CirclePow.cpp

index def012614de2e756a4794179d9333d706241ac3a..1d2d41607ab7c838e1968052262d59b94299df5b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CirclePow.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePow.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CirclePowGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CirclePowGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleRange.cpp b/compiler/luci/import/src/Nodes/CircleRange.cpp

index 38dc44ed67ad479a23d5a8a6b74ba08cc7df4376..d3b5afc95531111e52c68939fbef8c2f6b96255f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRange.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRange.cpp
@@ -24,11 +24,8 @@ namespace luci
  {
  bool CircleRangeGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 3)
-    return false;
-
    // TODO Support type check
-  return true;
+  return GraphBuilder::validate(args, 3);
  }
  
  CircleNode *CircleRangeGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleRank.cpp b/compiler/luci/import/src/Nodes/CircleRank.cpp

index 12658b1921f9f481b516bec0121307f7d5e80219..afebb9509ae112d65b8c94600cbda85ea7219e99 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRank.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRank.cpp
@@ -24,13 +24,7 @@ namespace luci
  {
  bool CircleRankGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleRankGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp

index 21a82195112fadb1a559a942a5b5bc261567961d..13205dd7a1c79a80c923bd83cd35c35180f11f27 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
@@ -23,13 +23,11 @@ namespace luci
  
  bool CircleReduceAnyGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor_0 = tensors.at(inputs.at(0));
    const auto &tensor_1 = tensors.at(inputs.at(1));
diff --git a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp

index 5f054586e0da073948f4c50b709d222bc19da132..3549c1a1805fb5e912a8747929433646d44dbde8 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
@@ -23,12 +23,10 @@ namespace luci
  
  bool CircleReduceProdGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 2)
-    return false;
-  if (args.op.outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor_1 = tensors.at(inputs.at(1));
  
diff --git a/compiler/luci/import/src/Nodes/CircleRelu.cpp b/compiler/luci/import/src/Nodes/CircleRelu.cpp

index 8e1c32a3ae4a5fa08f294586b106d84e26900c44..73b8ffee82572226e9ff186af6c9a7f619a38257 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRelu.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CircleReluGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleReluGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleRelu6.cpp b/compiler/luci/import/src/Nodes/CircleRelu6.cpp

index 0283d7350b0d4f1c0a189b8b3f51df3d5d5a3f39..ab957eda88d55043b67bfe8516ca8056eeccf410 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRelu6.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRelu6.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CircleRelu6GraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleRelu6GraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp

index 7f517bc0dd3014451e748e5c6da640752ebc2b8b..4987f3be225ba12228842e7829498ba364a20535 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp
@@ -25,15 +25,8 @@ namespace luci
  
  bool CircleReluN1To1GraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
    // TODO check dtypes
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleReluN1To1GraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleReshape.cpp b/compiler/luci/import/src/Nodes/CircleReshape.cpp

index 996ae9d20ffe16b1688957cbea747959cf709b22..401dff0fcd16fc477f0a938f4b19bd15ea2bb1f4 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReshape.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReshape.cpp
@@ -30,6 +30,19 @@ bool CircleReshapeGraphBuilder::validate(const ValidateArgs &args) const
    if (args.op.outputs.size() != 1)
      return false;
  
+  // for two inputs, check if type is S32
+  if (args.op.inputs.size() == 2)
+  {
+    const auto &inputs = args.op.inputs;
+    const auto &tensors = args.reader.tensors();
+    const auto &tensor_in = tensors.at(inputs.at(1));
+
+    // NOTE fix this if there is any other case
+    // TensorFlow lite and circle only supports S32
+    if (tensor_in->type != circle::TensorType::TensorType_INT32)
+      return false;
+  }
+
    return true;
  }
  
@@ -53,6 +66,7 @@ static CircleNode *create_shape_node(const std::vector<int32_t> &shape, loco::Gr
    {
      shape_node->at<loco::DataType::S32>(i) = shape[i];
    }
+  shape_node->name("Reshape/shape");
    return shape_node;
  }
  
@@ -73,6 +87,7 @@ CircleNode *CircleReshapeGraphBuilder::build_node(const circle::OperatorT &op,
        shape_node = graph->nodes()->create<CircleOutputDummy>();
        shape_node->dtype(loco::DataType::S32);
        shape_node->rank(0);
+      shape_node->name("Reshape/dummy");
      }
    }
  
diff --git a/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp

index 0fccb7b440a77df1a91e3cfa55989ab757e402d6..c751b245cb0fd13135eb430edb3921b30ee9cef2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp
+++ b/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp
@@ -16,7 +16,6 @@
  
  #include "luci/Import/Nodes/CircleResizeBilinear.h"
  
-#include <luci/IR/Nodes/CircleConst.h>
  #include <luci/IR/Nodes/CircleResizeBilinear.h>
  
  namespace luci
@@ -24,13 +23,7 @@ namespace luci
  
  bool CircleResizeBilinearGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleResizeBilinearGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp

index 324323f5940f8b36c05f9038e540cba16c593532..df7517fe918d39050e65e102fa525b846eb499d2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp
+++ b/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp
@@ -16,7 +16,6 @@
  
  #include "luci/Import/Nodes/CircleResizeNearestNeighbor.h"
  
-#include <luci/IR/Nodes/CircleConst.h>
  #include <luci/IR/Nodes/CircleResizeNearestNeighbor.h>
  
  namespace luci
@@ -24,17 +23,11 @@ namespace luci
  
  bool CircleResizeNearestNeighborGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleResizeNearestNeighborGraphBuilder::build_node(
-    const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+  const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleResizeNearestNeighbor>();
    node->input(inputs.at(0));
diff --git a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp

index ad11d4c6393040208c3e5c7f3d633c45f3a78382..2fbb7a87c566765ead40810f9483accb5b9fdddc 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
@@ -25,14 +25,11 @@ namespace luci
  
  bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor_in = tensors.at(inputs.at(0));
    const auto &tensor_lengths = tensors.at(inputs.at(1));
diff --git a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp

index e2e53bb4b77cda0a419eb29b89c973fe54a3508a..ca76532010c52ea801eca8086a81dabcb965c995 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
@@ -25,14 +25,11 @@ namespace luci
  
  bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor_in = tensors.at(inputs.at(0));
    const auto &tensor_axis = tensors.at(inputs.at(1));
diff --git a/compiler/luci/import/src/Nodes/CircleRound.cpp b/compiler/luci/import/src/Nodes/CircleRound.cpp

index ad77f9f03e90fd0f37294735fb9bd8e4085ed982..d13e0fafe2830ffaec72878044d610f7d2ab4d9d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRound.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRound.cpp
@@ -25,14 +25,11 @@ namespace luci
  
  bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    // Must be one of the following types
    // bfloat16, half (float16), float32, float64, complex64, complex128
    // Currently, circle supports float16, float32, complex64
diff --git a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp

index ae05fbbf9b7828ca306d3888f309e77665121db3..a9ca90832c67cdc720232756fb46b291beaeb2b0 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
@@ -25,10 +25,10 @@ namespace luci
  
  bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
    // Must be one of the following types
    // bfloat16, half (float16), float32, float64, complex64, complex128
    // Currently, circle supports float16, float32, complex64
@@ -36,6 +36,8 @@ bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const
    const auto &tensor = tensors.at(inputs.at(0));
    switch (tensor->type)
    {
+    case circle::TensorType_UINT8:
+    case circle::TensorType_INT16:
      case circle::TensorType_FLOAT16:
      case circle::TensorType_FLOAT32:
      case circle::TensorType_COMPLEX64:
diff --git a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp

index 7f86aeb7422371a7068fc08abe8f418d066503fb..f8c17511089a6c4e6f7c4fdfd2463973e09bf14d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
@@ -25,10 +25,10 @@ namespace luci
  
  bool CircleScatterNdGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 3)
+  if (!GraphBuilder::validate(args, 3))
      return false;
  
+  const auto &inputs = args.op.inputs;
    // indices must have the same type as shape
    const auto &tensors = args.reader.tensors();
  
diff --git a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp

index fb84e5d529937e48444cfb68ace0e2c2fb78b273..bfa333e8d7910a0e7b9051bd8e0595666e388d2d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
@@ -25,13 +25,11 @@ namespace luci
  
  bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor_in = tensors.at(inputs.at(0));
    const auto &tensor_out = tensors.at(outputs[0]);
diff --git a/compiler/luci/import/src/Nodes/CircleSelect.cpp b/compiler/luci/import/src/Nodes/CircleSelect.cpp

index 1e649f1e05d502a423eae513c0c76fa5a511ea0e..36a5fa8a8dd84aa1e9709933caf1be158fdcfcea 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSelect.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelect.cpp
@@ -25,13 +25,10 @@ namespace luci
  
  bool CircleSelectGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 3)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 3))
      return false;
  
+  const auto &inputs = args.op.inputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor = tensors.at(inputs.at(0));
    if (tensor->type != circle::TensorType_BOOL)
diff --git a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp

index e6dd04de045a72f298e07fddc4d69283e0613241..556c8fa338093dd69d6ce1aa30ad0c6b9bee8f18 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
@@ -25,13 +25,10 @@ namespace luci
  
  bool CircleSelectV2GraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 3)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 3))
      return false;
  
+  const auto &inputs = args.op.inputs;
    const auto &tensors = args.reader.tensors();
    const auto &condition = tensors.at(inputs.at(0));
    if (condition->type != circle::TensorType_BOOL)
diff --git a/compiler/luci/import/src/Nodes/CircleShape.cpp b/compiler/luci/import/src/Nodes/CircleShape.cpp

index bd7dfc9d904d3e37e2c0eb9f391a03a5651cbac0..86c0bf59b15c1d303c25a869279fae0113163d85 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleShape.cpp
+++ b/compiler/luci/import/src/Nodes/CircleShape.cpp
@@ -25,16 +25,8 @@ namespace luci
  
  bool CircleShapeGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
-    return false;
-
    // TODO check shape, dtype
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleShapeGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSin.cpp b/compiler/luci/import/src/Nodes/CircleSin.cpp

index 4b245ef6bafe02d354eadd10a274fed2f0c6aa64..22f46112321a054652747a4322d17e9b38268a56 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSin.cpp
@@ -25,12 +25,10 @@ namespace luci
  
  bool CircleSinGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-  if (args.op.outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
    // input type check
    const auto &tensors = args.reader.tensors();
    const auto &tensor = tensors.at(inputs.at(0));
diff --git a/compiler/luci/import/src/Nodes/CircleSlice.cpp b/compiler/luci/import/src/Nodes/CircleSlice.cpp

index 8601fbf21f760753f05192390a0529e1a9d35281..4166040b3d02ad759c0645b496523d4ec4e38205 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSlice.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSlice.cpp
@@ -27,14 +27,8 @@ namespace luci
  
  bool CircleSliceGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 3)
-    return false;
-  if (args.op.outputs.size() != 1)
-    return false;
-
    // TODO check shapes and types
-
-  return true;
+  return GraphBuilder::validate(args, 3);
  }
  
  CircleNode *CircleSliceGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleSoftmax.cpp b/compiler/luci/import/src/Nodes/CircleSoftmax.cpp

index 0ef0b5418d76a325d36f24aad7085d138b1ada90..e799144553fd826d16b7f8d344d345452a2ce7ee 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
@@ -25,12 +25,8 @@ namespace luci
  
  bool CircleSoftmaxGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
    // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleSoftmaxGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp

index 8ccd55dc67c07b7f1b94d8d515cf8b285c76b30c..2152b65c9aa61becd4310a8edae16ce2a578a166 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp
@@ -27,13 +27,8 @@ namespace luci
  
  bool CircleSpaceToDepthGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-
    // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleSpaceToDepthGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp

index ac756b1f37f636f9291272bef6d537c09b656b8d..ce0688bb92450e1e004353745cb53fa90c4c060c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleSparseToDenseGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 4)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 4);
  }
  
  CircleNode *CircleSparseToDenseGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSplit.cpp b/compiler/luci/import/src/Nodes/CircleSplit.cpp

index 07b6cc939d5c1b42a0585cacdbd03b0ef4d58b72..d0a24aae33cb87dc3b846f8e9210ea7f4ba5ff20 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSplit.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSplit.cpp
@@ -58,62 +58,27 @@ bool CircleSplitGraphBuilder::validate(const ValidateArgs &args) const
   *                          \- CircleSplitOut --- FullyConnected ---
   */
  
-void CircleSplitGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleSplitGraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
+  auto node = bna.context->graph()->nodes()->create<CircleSplit>();
  
-  auto graph = context->graph();
+  node->split_dim(bna.input_nodes[0]);
+  node->input(bna.input_nodes[1]);
  
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto *options = bna.op.builtin_options.AsSplitOptions();
+  node->num_split(options->num_splits);
  
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
+  return node;
+}
  
-  // Create CircleSplit
-  auto node = graph->nodes()->create<CircleSplit>();
-  node->split_dim(input_nodes[0]);
-  node->input(input_nodes[1]);
+CircleNode *CircleSplitGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleSplitOut>();
  
-  const auto *options = op.builtin_options.AsSplitOptions();
-  node->num_split(options->num_splits);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
  
-  assert(outputs.size() > 0);
-  assert(int32_t(outputs.size()) == options->num_splits);
-  {
-    // Let's use name of output 0 as Split name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for Split itself but to virtual outputs
-  }
-
-  // Create virtual outputs of Split
-  for (int32_t n = 0; n < options->num_splits; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleSplitOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleSplitV.cpp b/compiler/luci/import/src/Nodes/CircleSplitV.cpp

index 7c6e83e17c9006b30d4da78ba8bd6dba1c69dbb8..76cbf7046286b627137e8537881528ca53fc6fbe 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSplitV.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSplitV.cpp
@@ -58,64 +58,30 @@ bool CircleSplitVGraphBuilder::validate(const ValidateArgs &args) const
   *                           \- CircleSplitVOut --- FullyConnected ---
   */
  
-void CircleSplitVGraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleSplitVGraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
-
-  auto graph = context->graph();
-
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleSplitV
-  auto node = graph->nodes()->create<CircleSplitV>();
-  node->input(input_nodes[0]);
-  node->size_splits(input_nodes[1]);
-  node->split_dim(input_nodes[2]);
-
-  const auto *options = op.builtin_options.AsSplitVOptions();
+  auto node = bna.context->graph()->nodes()->create<CircleSplitV>();
+
+  node->input(bna.input_nodes[0]);
+  node->size_splits(bna.input_nodes[1]);
+  node->split_dim(bna.input_nodes[2]);
+
+  const auto *options = bna.op.builtin_options.AsSplitVOptions();
    node->num_split(options->num_splits);
  
-  assert(outputs.size() > 0);
-  assert(int32_t(outputs.size()) == options->num_splits);
-  {
-    // Let's use name of output 0 as Split name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for Split itself but to virtual outputs
-  }
-
-  // Create virtual outputs of Split
-  for (int32_t n = 0; n < options->num_splits; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleSplitVOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  assert(int32_t(bna.op.outputs.size()) == options->num_splits);
+
+  return node;
+}
+
+CircleNode *CircleSplitVGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleSplitVOut>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleSqrt.cpp b/compiler/luci/import/src/Nodes/CircleSqrt.cpp

index c8beaee0dbbbf8607b86529c1a06728c169beae4..b1fdf79966c03909fa410fed388143778915c98f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSqrt.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSqrt.cpp
@@ -25,10 +25,7 @@ namespace luci
  
  bool CircleSqrtGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleSqrtGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleSquare.cpp b/compiler/luci/import/src/Nodes/CircleSquare.cpp

index b5ba048d775d0dd3e30f32e78bd52818e0e951da..7ff2b84e6f07864fe48548f52a5e23156e8f3f26 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSquare.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquare.cpp
@@ -25,10 +25,10 @@ namespace luci
  
  bool CircleSquareGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
    // Must be one of the following types
    // bfloat16, half (float16), float32, float64, complex64, complex128
    // Currently, circle supports float16, float32, complex64
diff --git a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp

index 6deae94c5e7c59d16dc25f01220883c49676c8c4..f4e193713ba95e58b943b1f92d1329190652613a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
@@ -25,15 +25,11 @@ namespace luci
  
  bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    // Inputs must be one of the following types
    // bfloat16, half(float16), float32, float64, int32, int64, complex64, complex128
    const auto &tensors = args.reader.tensors();
diff --git a/compiler/luci/import/src/Nodes/CircleSqueeze.cpp b/compiler/luci/import/src/Nodes/CircleSqueeze.cpp

index 32792c266bb7adbc370d2d087a6563a9203f6fa4..d24d8166ce9af758721616120c221a51c906bbc4 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSqueeze.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSqueeze.cpp
@@ -16,7 +16,6 @@
  
  #include "luci/Import/Nodes/CircleSqueeze.h"
  
-#include <luci/IR/Nodes/CircleConst.h>
  #include <luci/IR/Nodes/CircleSqueeze.h>
  
  namespace luci
@@ -24,13 +23,7 @@ namespace luci
  
  bool CircleSqueezeGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleSqueezeGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp

index 8f943a6824cbf94c554e2c2b0aaf4e5009b90703..ca8259cac179880ac0fbd4427358ec7c83ecd9b6 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp
+++ b/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp
@@ -27,14 +27,8 @@ namespace luci
  
  bool CircleStridedSliceGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 4)
-    return false;
-  if (args.op.outputs.size() != 1)
-    return false;
-
    // TODO check shapes and types
-
-  return true;
+  return GraphBuilder::validate(args, 4);
  }
  
  CircleNode *CircleStridedSliceGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSub.cpp b/compiler/luci/import/src/Nodes/CircleSub.cpp

index 9acf83d40a9aa680aa1c762e66a14625826eebd9..c3978f21884432fc1dfcbae9ba62fcdf12ffcd92 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSub.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSub.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CircleSubGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleSubGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSum.cpp b/compiler/luci/import/src/Nodes/CircleSum.cpp

index bd3cb62396f995c0f6f5caa583d44e444a67373a..e348a62d950ff8e841fbf8b998f8e5351cf2edc9 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSum.cpp
@@ -23,10 +23,7 @@ namespace luci
  
  bool CircleSumGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleSumGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleTanh.cpp b/compiler/luci/import/src/Nodes/CircleTanh.cpp

index 018f5701b8951b273209491d7b6ce64d6c648c57..95625a0e448b15e4630edd743caae557235831fd 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTanh.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTanh.cpp
@@ -25,13 +25,11 @@ namespace luci
  
  bool CircleTanhGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-  const auto &outputs = args.op.outputs;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
      return false;
diff --git a/compiler/luci/import/src/Nodes/CircleTile.cpp b/compiler/luci/import/src/Nodes/CircleTile.cpp

index bc6f320bac727ad88b70a0761030fca45df925a7..6da44130c994dfe5ba404c89f369dcd004f15021 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTile.cpp
@@ -25,15 +25,11 @@ namespace luci
  
  bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
  {
-  auto inputs = args.op.inputs;
-  auto outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
      return false;
  
+  auto inputs = args.op.inputs;
+  auto outputs = args.op.outputs;
    // Multiples (inputs.at(1)) must be one of the following types
    // int32, int64
    const auto &tensors = args.reader.tensors();
diff --git a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp

index f0677de86ce1931db4c9d0ee4ee62bd94ec6f6b8..49f8587989c90bc95fc9190322333ec0aa756def 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
@@ -59,59 +59,24 @@ bool CircleTopKV2GraphBuilder::validate(const ValidateArgs &args) const
   *                           \- CircleTopKV2Out --- FullyConnected ---
   */
  
-void CircleTopKV2GraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleTopKV2GraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
-
-  auto graph = context->graph();
-
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleTopKV2
-  auto node = graph->nodes()->create<CircleTopKV2>();
-  node->input(input_nodes[0]);
-  node->k(input_nodes[1]);
-
-  assert(outputs.size() == 2);
-  {
-    // Let's use name of output 0 as TopKV2 name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for TopKV2 itself but to virtual outputs
-  }
-
-  // Create virtual outputs of TopKV2
-  for (size_t n = 0; n < outputs.size(); ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleTopKV2Out>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  auto node = bna.context->graph()->nodes()->create<CircleTopKV2>();
+
+  node->input(bna.input_nodes[0]);
+  node->k(bna.input_nodes[1]);
+
+  return node;
+}
+
+CircleNode *CircleTopKV2GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleTopKV2Out>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleTranspose.cpp b/compiler/luci/import/src/Nodes/CircleTranspose.cpp

index cc3153085336c803b2c249283521b838bffd5cb8..01095239ee3db671f2b491ac0ce654c10bc3fe73 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTranspose.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTranspose.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CircleTransposeGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
  }
  
  CircleNode *CircleTransposeGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp

index c280faaf5c282c0364dace5b9c56a79be48333f8..5a60e2f541b3bec757a2274dd2c5baa80600a345 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
@@ -61,16 +61,15 @@ CircleNode *CircleTransposeConvGraphBuilder::build_node(const circle::OperatorT
    node->filter(inputs.at(1));
    node->outBackprop(inputs.at(2));
    if (inputs.size() == 3)
-    node->bias(graph->nodes()->create<CircleOutputExclude>());
-  else
-    node->bias(inputs.at(3));
-
-  if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
    {
-    // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
-    // is inserted.
+    auto *bias = graph->nodes()->create<CircleOutputExclude>();
+    // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
+    // a dummy type is inserted.
      bias->dtype(loco::DataType::FLOAT32);
+    node->bias(bias);
    }
+  else
+    node->bias(inputs.at(3));
  
    const auto *options = op.builtin_options.AsTransposeConvOptions();
    node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/import/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp

index c41cf4def5098635be0798840db4b7c52002d601..d9cc3f8d0c97c847852cb549393863027966f208 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
+++ b/compiler/luci/import/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
@@ -25,14 +25,11 @@ namespace luci
  
  bool CircleUnidirectionalSequenceLSTMGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 24)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 24);
  }
  
  CircleNode *CircleUnidirectionalSequenceLSTMGraphBuilder::build_node(
-    const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+  const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleUnidirectionalSequenceLSTM>();
    node->input(inputs.at(0));
@@ -59,16 +56,6 @@ CircleNode *CircleUnidirectionalSequenceLSTMGraphBuilder::build_node(
    node->forget_layer_norm_coefficients(inputs.at(21)); // Optional
    node->cell_layer_norm_coefficients(inputs.at(22));   // Optional
    node->output_layer_norm_coefficients(inputs.at(23)); // Optional
-  const std::vector<int32_t> optionals = {1, 5, 9, 10, 11, 12, 16, 17, 20, 21, 22, 23};
-  for (auto optional : optionals)
-  {
-    if (auto inp = dynamic_cast<luci::CircleOutputExclude *>(node->arg(optional)))
-    {
-      // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
-      // is inserted.
-      inp->dtype(loco::DataType::FLOAT32);
-    }
-  }
  
    const auto *options = op.builtin_options.AsUnidirectionalSequenceLSTMOptions();
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
diff --git a/compiler/luci/import/src/Nodes/CircleUnique.cpp b/compiler/luci/import/src/Nodes/CircleUnique.cpp

index 5e79a2920c3e1b008d4eb3a6b59ca58c238d9609..f6914c24a9b9d70b750c21be826c48497ca31ca7 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleUnique.cpp
+++ b/compiler/luci/import/src/Nodes/CircleUnique.cpp
@@ -35,55 +35,26 @@ bool CircleUniqueGraphBuilder::validate(const ValidateArgs &args) const
    return true;
  }
  
-void CircleUniqueGraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleUniqueGraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
+  auto node = bna.context->graph()->nodes()->create<CircleUnique>();
  
-  auto graph = context->graph();
+  node->input(bna.input_nodes[0]);
  
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto *options = bna.op.builtin_options.AsUniqueOptions();
+  node->idx_out_type(luci_datatype(options->idx_out_type));
  
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleUnique
-  auto node = graph->nodes()->create<CircleUnique>();
-  node->input(input_nodes[0]);
-
-  const auto *options = op.builtin_options.AsUniqueOptions();
-  node->output_type(luci_datatype(options->idx_out_type));
-
-  assert(int32_t(outputs.size()) == 2);
-  // Let's use name of output 0 as Unique name
-  const circle::TensorT &output_tensor = *tensors[outputs[0]];
-  node->name(tensor_name(output_tensor));
-
-  // Create virtual outputs of Unique
-  for (int32_t n = 0; n < 2; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+  return node;
+}
  
-    auto *nodeout = graph->nodes()->create<CircleUniqueOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleUniqueGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleUniqueOut>();
  
-    nodeout->input(node);
-    nodeout->index(n);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
  
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleUnpack.cpp b/compiler/luci/import/src/Nodes/CircleUnpack.cpp

index 9e7f3d3e1029f877f6a65a9b48e83ca3e826dfc7..9bfc76b5722f33b781499de011b51063d131757c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleUnpack.cpp
+++ b/compiler/luci/import/src/Nodes/CircleUnpack.cpp
@@ -88,64 +88,27 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
   *                           \- CircleUnpackOut --- FullyConnected ---
   */
  
-void CircleUnpackGraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleUnpackGraphBuilder::build_node(const BuildNodeArgs &bna) const
  {
-  assert(context != nullptr);
+  auto node = bna.context->graph()->nodes()->create<CircleUnpack>();
  
-  auto graph = context->graph();
+  node->value(bna.input_nodes[0]);
  
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  // NOTE Unpack has only one input so running a loop is not necessary
-  //      This is provided as a reference for other Ops as a reference
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleUnpack
-  CircleUnpack *node = graph->nodes()->create<CircleUnpack>();
-  node->value(input_nodes[0]);
-
-  const auto *options = op.builtin_options.AsUnpackOptions();
+  const auto *options = bna.op.builtin_options.AsUnpackOptions();
    node->num(options->num);
    node->axis(options->axis);
  
-  assert(outputs.size() > 0);
-  {
-    // Let's use name of output 0 as Unpack name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for Unpack itself but to virtual outputs
-  }
-
-  // Create virtual outputs of Unpack
-  for (int32_t n = 0; n < options->num; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+  return node;
+}
  
-    auto *nodeout = graph->nodes()->create<CircleUnpackOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleUnpackGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleUnpackOut>();
  
-    nodeout->input(node);
-    nodeout->index(n);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
  
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleWhere.cpp b/compiler/luci/import/src/Nodes/CircleWhere.cpp

index f4c5f0c66136c3f5c7542e2834e27d4f883c69a3..8e4f1a0c4209dca66fc1070003397675b0e7d56f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleWhere.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhere.cpp
@@ -25,15 +25,11 @@ namespace luci
  
  bool CircleWhereGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
    const auto &tensors = args.reader.tensors();
    const auto &tensor_condition = tensors.at(inputs.at(0));
    const auto &tensor_out = tensors.at(outputs[0]);
diff --git a/compiler/luci/import/src/Nodes/CircleWhile.cpp b/compiler/luci/import/src/Nodes/CircleWhile.cpp

index aead250713189d5026323d9c50ad3815a1528def..26147562f7cc0e0d144ae42b463a257fd1bc0ee0 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleWhile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhile.cpp
@@ -58,7 +58,8 @@ bool CircleWhileGraphBuilder::validate(const ValidateArgs &args) const
   *                       \- CircleWhileOut --- Node ---
   */
  
-void CircleWhileGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
+                                           GraphBuilderContext *context) const
  {
    assert(context != nullptr);
  
@@ -118,6 +119,8 @@ void CircleWhileGraphBuilder::build(const circle::OperatorT &op, GraphBuilderCon
  
      context->nodefinder()->enroll(outputs[n], nodeout);
    }
+
+  return node;
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleZerosLike.cpp b/compiler/luci/import/src/Nodes/CircleZerosLike.cpp

index e60424def2333fb48a9c9bfdf75a154113aa061b..ddb05e8a49f62cded376048e48115639ff417d3e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleZerosLike.cpp
+++ b/compiler/luci/import/src/Nodes/CircleZerosLike.cpp
@@ -25,13 +25,7 @@ namespace luci
  
  bool CircleZerosLikeGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
  }
  
  CircleNode *CircleZerosLikeGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/PostImport.cpp b/compiler/luci/import/src/PostImport.cpp

index f436b48e849fa80471344a892df63dd320ce733b..63b16bb95310a8de946aef2c0d8a96fe30885b56 100644 (file)
--- a/compiler/luci/import/src/PostImport.cpp
+++ b/compiler/luci/import/src/PostImport.cpp
@@ -130,7 +130,10 @@ private:
  namespace
  {
  /**
- * @brief  ValidateNodeProp will validate inter graph connections for each Nodes
+ * @brief  ValidateNodeProp will validate inter graph connections for each Nodes.
+ * @note   In here, only loco::GraphInput and loco::GraphOutput are validated,
+ *         since this class is for checking inter graph connections.
+ *         CircleNodes such as CircleInput and CircleOutput will be validated at later steps.
   */
  class ValidateNodeProp final : public luci::CircleNodeMutableVisitor<void>
  {
@@ -172,9 +175,19 @@ public:
  
        auto then_graph_output = then_graph_outputs->at(then_out->index());
        auto else_graph_output = else_graph_outputs->at(else_out->index());
-      if (!(*then_graph_output->shape() == *else_graph_output->shape()))
+      if (then_graph_output->shape()->rank() != else_graph_output->shape()->rank())
        {
-        INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output shape mismatch ", idx);
+        INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output rank mismatch ", idx);
+      }
+      for (uint32_t i = 0; i < then_graph_output->shape()->rank(); ++i)
+      {
+        if (then_graph_output->shape()->dim(i).known() &&
+            else_graph_output->shape()->dim(i).known() &&
+            then_graph_output->shape()->dim(i).value() !=
+              else_graph_output->shape()->dim(i).value())
+        {
+          INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output dimension mismatch ", idx);
+        }
        }
        if (then_graph_output->dtype() != else_graph_output->dtype())
        {
@@ -231,18 +244,20 @@ public:
  
        auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
        auto body_graph_input = body_graph_inputs->at(body_in->index());
-      if ((cond_in->rank() != body_in->rank()))
+      if (cond_graph_input->shape()->rank() != body_graph_input->shape()->rank())
        {
-        INTERNAL_EXN_V("CircleWhile COND input and BODY input shape mismatch ", idx);
+        INTERNAL_EXN_V("CircleWhile COND input and BODY input rank mismatch ", idx);
        }
-      if (cond_in->rank() > 0 && body_in->rank() > 0)
+      for (uint32_t i = 0; i < cond_graph_input->shape()->rank(); ++i)
        {
-        if (!(*cond_graph_input->shape() == *body_graph_input->shape()))
+        if (cond_graph_input->shape()->dim(i).known() &&
+            body_graph_input->shape()->dim(i).known() &&
+            cond_graph_input->shape()->dim(i).value() != body_graph_input->shape()->dim(i).value())
          {
-          INTERNAL_EXN_V("CircleWhile COND input and BODY input shape mismatch ", idx);
+          INTERNAL_EXN_V("CircleWhile COND input and BODY input dimension mismatch ", idx);
          }
        }
-      if (cond_in->dtype() != body_in->dtype())
+      if (cond_graph_input->dtype() != body_graph_input->dtype())
        {
          INTERNAL_EXN_V("CircleWhile COND input and BODY input type mismatch ", idx);
        }
@@ -257,18 +272,20 @@ public:
  
        auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
        auto body_graph_output = body_graph_outputs->at(body_out->index());
-      if ((cond_in->rank() != body_out->rank()))
+      if (cond_graph_input->shape()->rank() != body_graph_output->shape()->rank())
        {
-        INTERNAL_EXN_V("CircleWhile COND input and BODY output shape mismatch ", idx);
+        INTERNAL_EXN_V("CircleWhile COND input and BODY output rank mismatch ", idx);
        }
-      if (cond_in->rank() > 0 && body_out->rank() > 0)
+      for (uint32_t i = 0; i < cond_graph_input->shape()->rank(); ++i)
        {
-        if (!(*cond_graph_input->shape() == *body_graph_output->shape()))
+        if (cond_graph_input->shape()->dim(i).known() &&
+            body_graph_output->shape()->dim(i).known() &&
+            cond_graph_input->shape()->dim(i).value() != body_graph_output->shape()->dim(i).value())
          {
-          INTERNAL_EXN_V("CircleWhile COND input and BODY output shape mismatch ", idx);
+          INTERNAL_EXN_V("CircleWhile COND input and BODY output dimension mismatch ", idx);
          }
        }
-      if (cond_in->dtype() != body_out->dtype())
+      if (cond_graph_input->dtype() != body_graph_output->dtype())
        {
          INTERNAL_EXN_V("CircleWhile COND input and BODY output type mismatch ", idx);
        }
diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt

index 32d0a890d5cc47f7d1198d0b78b65a4bc2cb22b1..c618fdd6f041d389ac67d63fb335f0253e11025d 100644 (file)
--- a/compiler/luci/lang/CMakeLists.txt
+++ b/compiler/luci/lang/CMakeLists.txt
@@ -7,6 +7,7 @@ target_include_directories(luci_lang PRIVATE src)
  target_include_directories(luci_lang PUBLIC include)
  target_link_libraries(luci_lang PUBLIC loco)
  target_link_libraries(luci_lang PUBLIC oops)
+target_link_libraries(luci_lang PUBLIC nncc_coverage)
  target_link_libraries(luci_lang PRIVATE logo)
  target_link_libraries(luci_lang PRIVATE nncc_common)
  
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h b/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h

index e6410d154e719058342623ead44521be7cc6e2af..edec9d18bcf70c6d2a20c12e0d00b5d313e7eff1 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h
@@ -20,7 +20,6 @@
  #include <loco/IR/Dialect.h>
  #include <loco/IR/Node.h>
  #include <loco/IR/NodeMixins.h>
-#include <luci/IR/CircleShapeSignature.h>
  #include <luci/IR/PropertyShapeStatus.h>
  
  #include "CircleOpcode.h"
@@ -62,9 +61,6 @@ struct CircleNode : public loco::Node,
      _sparsityparam = std::move(sparsityparam);
    }
  
-  const ShapeSignature &shape_signature(void) const { return _shape_signature; }
-  void shape_signature(const ShapeSignature &ss) { _shape_signature = ss; }
-
    ShapeStatus shape_status(void) const { return _shape_status; }
    void shape_status(ShapeStatus ss) { _shape_status = ss; }
  
@@ -75,7 +71,6 @@ private:
    NodeName _name;
    std::unique_ptr<CircleQuantParam> _quantparam;
    std::unique_ptr<SparsityParam> _sparsityparam;
-  ShapeSignature _shape_signature;
    ShapeStatus _shape_status{ShapeStatus::UNDEFINED};
    int32_t _op_version = 1;
  };
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h b/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h

index a6b9488dbfe9488fd437ab04f1bfc07574a9ea97..4b3178b9b7b35e3f97c0cd73b34e11b719aaf988 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h
@@ -34,8 +34,10 @@ template <typename T> T CircleNode::accept(CircleNodeVisitorBase<T> *v) const
                                     \
    case CircleOpcode::OPCODE:       \
      return v->visit(dynamic_cast<const CLASS *>(this));
+#define CIRCLE_VNODE CIRCLE_NODE
  
  #include "CircleNodes.lst"
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  
      default:
@@ -53,8 +55,10 @@ template <typename T> T CircleNode::accept(CircleNodeMutableVisitorBase<T> *v)
                                     \
    case CircleOpcode::OPCODE:       \
      return v->visit(dynamic_cast<CLASS *>(this));
+#define CIRCLE_VNODE CIRCLE_NODE
  
  #include "CircleNodes.lst"
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  
      default:
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeMixins.h b/compiler/luci/lang/include/luci/IR/CircleNodeMixins.h

new file mode 100644 (file)

index 0000000..3f8ab7d
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeMixins.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NODE_MIXINS_H__
+#define __LUCI_IR_CIRCLE_NODE_MIXINS_H__
+
+#include "luci/IR/AttrFusedActFunc.h"
+
+#include <loco/IR/Node.h>
+#include <loco/IR/NodeMixins.h>
+
+#include <vector>
+
+namespace luci
+{
+
+/// @brief enumeration of mixin class
+enum class CircleNodeTrait
+{
+  FusedActFunc,
+  Bias
+};
+
+template <CircleNodeTrait T> class CircleNodeMixin;
+
+template <> class CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+  CircleNodeMixin() = default;
+
+public:
+  FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
+  void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
+
+private:
+  FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
+};
+
+/**
+ * @brief Mixin class for nodes that has a bias input
+ */
+template <> class CircleNodeMixin<CircleNodeTrait::Bias>
+{
+public:
+  CircleNodeMixin() = default;
+
+public:
+  virtual loco::Node *bias(void) const = 0; /// @brief get the input for bias.
+  virtual void bias(loco::Node *node) = 0;  /// @brief set the input for bias.
+};
+
+/**
+ * @brief Nodes with the fixed number of inputs
+ *
+ * TODO Deprecated this class, and use loco::FixedArity instead
+ */
+template <unsigned N, typename Base> class FixedArityNode : public Base
+{
+public:
+  FixedArityNode()
+  {
+    _args.resize(N);
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _args[n] = std::make_unique<loco::Use>(this);
+    }
+  }
+
+  virtual ~FixedArityNode() = default;
+
+public:
+  unsigned arity(void) const final { return N; }
+
+  loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
+
+  void drop(void) final
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _args.at(n)->node(nullptr);
+    }
+  }
+
+protected:
+  // This API allows inherited classes to access "_args" field.
+  loco::Use *at(unsigned n) const { return _args.at(n).get(); }
+
+private:
+  std::vector<std::unique_ptr<loco::Use>> _args{};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NODE_MIXINS_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h b/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h

index 43339fe84bdb2921cb1df4a8691142da3061aa4e..599e4bcd9b434d9ded4c5019820ab2424e66d27e 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h
@@ -33,8 +33,10 @@ template <typename T> struct CircleNodeVisitorBase
    virtual ~CircleNodeVisitorBase() = default;
  
  #define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) virtual T visit(const CIRCLE_CLASS *) = 0;
+#define CIRCLE_VNODE CIRCLE_NODE
  
  #include "CircleNodes.lst"
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  };
  
@@ -44,9 +46,11 @@ template <typename T> struct CircleNodeVisitor : public CircleNodeVisitorBase<T>
  
  #define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
    virtual T visit(const CIRCLE_CLASS *node) { return visit(static_cast<const CircleNode *>(node)); }
+#define CIRCLE_VNODE CIRCLE_NODE
  
  #include "CircleNodes.lst"
  
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  
    /// @brief Default fallback
@@ -61,9 +65,11 @@ template <typename T> struct CircleNodeMutableVisitorBase
    virtual ~CircleNodeMutableVisitorBase() = default;
  
  #define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) virtual T visit(CIRCLE_CLASS *) = 0;
+#define CIRCLE_VNODE CIRCLE_NODE
  
  #include "CircleNodes.lst"
  
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  };
  
@@ -73,9 +79,11 @@ template <typename T> struct CircleNodeMutableVisitor : public CircleNodeMutable
  
  #define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
    virtual T visit(CIRCLE_CLASS *node) { return visit(static_cast<CircleNode *>(node)); }
+#define CIRCLE_VNODE CIRCLE_NODE
  
  #include "CircleNodes.lst"
  
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  
    /// @brief Default fallback
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h

index fde0b612b0c9637cec969baa16bd386a4175e602..69a82a7b9782452fe7941cf4c5f453bb9d5590f0 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -25,6 +25,7 @@
  #include "Nodes/CircleAveragePool2D.h"
  #include "Nodes/CircleBatchMatMul.h"
  #include "Nodes/CircleBatchToSpaceND.h"
+#include "Nodes/CircleBidirectionalSequenceLSTM.h"
  #include "Nodes/CircleCast.h"
  #include "Nodes/CircleCeil.h"
  #include "Nodes/CircleConcatenation.h"
@@ -40,6 +41,7 @@
  #include "Nodes/CircleEqual.h"
  #include "Nodes/CircleExp.h"
  #include "Nodes/CircleExpandDims.h"
+#include "Nodes/CircleFakeQuant.h"
  #include "Nodes/CircleFill.h"
  #include "Nodes/CircleFloor.h"
  #include "Nodes/CircleFloorDiv.h"
@@ -134,6 +136,7 @@
  // Virtual nodes
  #include "Nodes/CircleInput.h"
  #include "Nodes/CircleOutput.h"
+#include "Nodes/CircleBidirectionalSequenceLSTMOut.h"
  #include "Nodes/CircleCustomOut.h"
  #include "Nodes/CircleIfOut.h"
  #include "Nodes/CircleNonMaxSuppressionV4Out.h"
@@ -150,15 +153,6 @@
  namespace luci
  {
  
-/**
- * @brief  Set both CircleReshape's 2nd input as CircleConst, and newShape attribute
- *         with same value
- * @note   Shape inference for TFLReshape forces them to be same
- *
- * TODO find better place for this helper
- */
-void set_new_shape(CircleReshape *node, int32_t *base, uint32_t size);
-
  /// @brief Link GraphOutput with CircleOutput node
  void link(loco::GraphOutput *, CircleOutput *);
  
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst

index b9d5458939f7729471cf5d05e04744d7c489ef49..b93fdc89d221319a5eee0582ae2a0f726ce4cd80 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -2,6 +2,10 @@
  #error "Define CIRCLE_NODE"
  #endif // CIRCLE_NODE
  
+#ifndef CIRCLE_VNODE
+#error "Define CIRCLE_VNODE"
+#endif // CIRCLE_VNODE
+
  //
  // PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
  //
@@ -18,7 +22,8 @@ CIRCLE_NODE(ARG_MAX, luci::CircleArgMax)
  CIRCLE_NODE(ARG_MIN, luci::CircleArgMin)
  CIRCLE_NODE(AVERAGE_POOL_2D, luci::CircleAveragePool2D)
  CIRCLE_NODE(BATCH_TO_SPACE_ND, luci::CircleBatchToSpaceND)
-CIRCLE_NODE(BATCHMATMUL, luci::CircleBatchMatMul)
+CIRCLE_NODE(BATCH_MATMUL, luci::CircleBatchMatMul)
+CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, luci::CircleBidirectionalSequenceLSTM)
  CIRCLE_NODE(CAST, luci::CircleCast)
  CIRCLE_NODE(CEIL, luci::CircleCeil)
  CIRCLE_NODE(CONCATENATION, luci::CircleConcatenation)
@@ -33,6 +38,7 @@ CIRCLE_NODE(ELU, luci::CircleElu)
  CIRCLE_NODE(EQUAL, luci::CircleEqual)
  CIRCLE_NODE(EXP, luci::CircleExp)
  CIRCLE_NODE(EXPAND_DIMS, luci::CircleExpandDims)
+CIRCLE_NODE(FAKE_QUANT, luci::CircleFakeQuant)
  CIRCLE_NODE(FILL, luci::CircleFill)
  CIRCLE_NODE(FLOOR, luci::CircleFloor)
  CIRCLE_NODE(FLOOR_DIV, luci::CircleFloorDiv)
@@ -125,18 +131,19 @@ CIRCLE_NODE(BCQ_FULLY_CONNECTED, luci::CircleBCQFullyConnected)
  CIRCLE_NODE(BCQ_GATHER, luci::CircleBCQGather)
  CIRCLE_NODE(INSTANCE_NORM, luci::CircleInstanceNorm)
  // Virtual node(s)
-CIRCLE_NODE(CIRCLECONST, luci::CircleConst)
-CIRCLE_NODE(CIRCLEINPUT, luci::CircleInput)
-CIRCLE_NODE(CIRCLEOUTPUT, luci::CircleOutput)
-CIRCLE_NODE(CIRCLEOUTPUTDUMMY, luci::CircleOutputDummy)
-CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
-CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
-CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
-CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
-CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
-CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
-CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
-CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
-CIRCLE_NODE(CIRCLEUNIQUEOUT, luci::CircleUniqueOut)
-CIRCLE_NODE(CIRCLEUNPACKOUT, luci::CircleUnpackOut)
-CIRCLE_NODE(CIRCLEWHILEOUT, luci::CircleWhileOut)
+CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, luci::CircleBidirectionalSequenceLSTMOut)
+CIRCLE_VNODE(CIRCLECONST, luci::CircleConst)
+CIRCLE_VNODE(CIRCLEINPUT, luci::CircleInput)
+CIRCLE_VNODE(CIRCLEOUTPUT, luci::CircleOutput)
+CIRCLE_VNODE(CIRCLEOUTPUTDUMMY, luci::CircleOutputDummy)
+CIRCLE_VNODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
+CIRCLE_VNODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
+CIRCLE_VNODE(CIRCLEIFOUT, luci::CircleIfOut)
+CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
+CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
+CIRCLE_VNODE(CIRCLESPLITOUT, luci::CircleSplitOut)
+CIRCLE_VNODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
+CIRCLE_VNODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
+CIRCLE_VNODE(CIRCLEUNIQUEOUT, luci::CircleUniqueOut)
+CIRCLE_VNODE(CIRCLEUNPACKOUT, luci::CircleUnpackOut)
+CIRCLE_VNODE(CIRCLEWHILEOUT, luci::CircleWhileOut)
diff --git a/compiler/luci/lang/include/luci/IR/CircleOpcode.h b/compiler/luci/lang/include/luci/IR/CircleOpcode.h

index 703b70da216da5d9e350fd2b25cf32f55391e39a..be3069f943f94d8d2d1352a3d064b0199915336c 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleOpcode.h
+++ b/compiler/luci/lang/include/luci/IR/CircleOpcode.h
@@ -23,7 +23,9 @@ namespace luci
  enum class CircleOpcode
  {
  #define CIRCLE_NODE(OPCODE, CLASS) OPCODE,
+#define CIRCLE_VNODE CIRCLE_NODE
  #include "CircleNodes.lst"
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  };
  
diff --git a/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h b/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h

deleted file mode 100644 (file)

index 18a2604..0000000
--- a/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_IR_SHAPE_SIGNATURE_H__
-#define __LUCI_IR_SHAPE_SIGNATURE_H__
-
-#include <stdint.h>
-#include <vector>
-
-namespace luci
-{
-
-class ShapeSignature
-{
-public:
-  ShapeSignature() = default;
-
-  ShapeSignature(const std::vector<int32_t> &shape_signature)
-  {
-    _shape_signature = shape_signature;
-  }
-
-public:
-  const std::vector<int32_t> &as_vector() const { return _shape_signature; }
-
-  int32_t dim(uint32_t d) const { return _shape_signature.at(d); }
-  int32_t &dim(uint32_t d) { return _shape_signature.at(d); }
-
-  uint32_t rank(void) const { return _shape_signature.size(); }
-  void rank(uint32_t rank) { _shape_signature.resize(rank); }
-
-private:
-  std::vector<int32_t> _shape_signature{};
-};
-
-bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs);
-
-} // namespace luci
-
-#endif // __LUCI_IR_SHAPE_SIGNATURE_H__
diff --git a/compiler/luci/lang/include/luci/IR/DeadNodeQueryService.h b/compiler/luci/lang/include/luci/IR/DeadNodeQueryService.h

new file mode 100644 (file)

index 0000000..d106966
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/DeadNodeQueryService.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LANG_DEADNODEQUERYSERVICE_H__
+#define __LUCI_LANG_DEADNODEQUERYSERVICE_H__
+
+#include <logo/DeadNodeQueryService.h>
+
+#include <loco/IR/Node.h>
+
+namespace luci
+{
+
+struct DeadNodeQueryServiceImpl final : public logo::DeadNodeQueryService
+{
+  bool isDeadNode(loco::Node *node) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_LANG_DEADNODEQUERYSERVICE_H__
diff --git a/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h b/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h

index c1bb0db111a36f2e4a02fa647632a3daa4b8d7bc..2078495c61c172ab15ca933bae03e5f83033a1e8 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h
+++ b/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h
@@ -17,90 +17,16 @@
  #ifndef __LUCI_IR_LUCINODEMIXINS_H__
  #define __LUCI_IR_LUCINODEMIXINS_H__
  
-#include "luci/IR/AttrFusedActFunc.h"
+// TODO remove this file after LuciNodeTrait and LuciNodeMixin are not used in backend
  
-#include <loco/IR/Node.h>
-#include <loco/IR/NodeMixins.h>
-
-#include <vector>
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
  
-/// @brief enumeration of mixin class
-enum class LuciNodeTrait
-{
-  FusedActFunc,
-  Bias
-};
-
-template <LuciNodeTrait T> class LuciNodeMixin;
-
-template <> class LuciNodeMixin<LuciNodeTrait::FusedActFunc>
-{
-public:
-  LuciNodeMixin() = default;
-
-public:
-  FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
-  void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
-
-private:
-  FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
-};
-
-/**
- * @brief Mixin class for nodes that has a bias input
- */
-template <> class LuciNodeMixin<LuciNodeTrait::Bias>
-{
-public:
-  LuciNodeMixin() = default;
-
-public:
-  virtual loco::Node *bias(void) const = 0; /// @brief get the input for bias.
-  virtual void bias(loco::Node *node) = 0;  /// @brief set the input for bias.
-};
-
-/**
- * @brief Nodes with the fixed number of inputs
- *
- * TODO Deprecated this class, and use loco::FixedArity instead
- */
-template <unsigned N, typename Base> class FixedArityNode : public Base
-{
-public:
-  FixedArityNode()
-  {
-    _args.resize(N);
-    for (uint32_t n = 0; n < N; ++n)
-    {
-      _args[n] = std::make_unique<loco::Use>(this);
-    }
-  }
-
-  virtual ~FixedArityNode() = default;
-
-public:
-  unsigned arity(void) const final { return N; }
-
-  loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
-
-  void drop(void) final
-  {
-    for (uint32_t n = 0; n < N; ++n)
-    {
-      _args.at(n)->node(nullptr);
-    }
-  }
-
-protected:
-  // This API allows inherited classes to access "_args" field.
-  loco::Use *at(unsigned n) const { return _args.at(n).get(); }
+using LuciNodeTrait = CircleNodeTrait;
  
-private:
-  std::vector<std::unique_ptr<loco::Use>> _args{};
-};
+template <LuciNodeTrait T> using LuciNodeMixin = CircleNodeMixin<T>;
  
  } // namespace luci
  
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h

index 45dba15bf45477694c2225db5ffb294d3892f813..7a73f37cde8b1314e48b9dc5d4da66c928fac269 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h

index f26eccd1a96e9f906278d801b62da9d48b3c656b..92563de4ccdfb55f5a4431341e6e82bd8419da39 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,7 +30,7 @@ namespace luci
   * @brief ADD in Circle
   */
  class CircleAdd final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::ADD>>,
-                        public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                        public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
  public:
    loco::Node *x(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h

index dbc4b2b3a523c6870b8c0d83b55aecdd11add3b7..c1e4631e4d471871636dc10becba6c2200a5b05a 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMin.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMin.h

index 8cb561983ed7605a9297423aa2f7313a307e131c..b4d026201ed5cbed92b857f73273b08dfd0eb367 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMin.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMin.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h

index 0b43b40c843a19bc13df11532602e49e6e6bf002..4aa45c2d84f918d411d24742c0d8afba4924d61c 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h
@@ -24,7 +24,7 @@
  #include "luci/IR/AttrPadding.h"
  #include "luci/IR/AttrStride.h"
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -33,16 +33,14 @@ namespace luci
   * @brief AVERAGE_POOL_2D in Circle
   */
  class CircleAveragePool2D final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::AVERAGE_POOL_2D>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::AVERAGE_POOL_2D>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
-public:
-  CircleAveragePool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
  public:
    loco::Node *value(void) const { return at(0)->node(); }
    void value(loco::Node *node) { at(0)->node(node); }
  
+public:
    Padding padding() const { return _padding; }
    void padding(Padding padding) { _padding = padding; }
  
@@ -53,7 +51,7 @@ public:
    Stride *stride(void) { return &_stride; }
  
  private:
-  Padding _padding;
+  Padding _padding{Padding::UNDEFINED};
    Stride _stride;
    Filter _filter;
  };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQFullyConnected.h

index 7d12d593a8ec310abd7b80a71be9a629503f4438..4c164ebca3d571439e29659133eeb47de46ef2c9 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQFullyConnected.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQFullyConnected.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,9 @@ namespace luci
   * @brief BCQ_FULLY_CONNECTED in Circle
   */
  class CircleBCQFullyConnected final
-    : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::BCQ_FULLY_CONNECTED>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
-      public LuciNodeMixin<LuciNodeTrait::Bias>
+  : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::BCQ_FULLY_CONNECTED>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+    public CircleNodeMixin<CircleNodeTrait::Bias>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
@@ -58,7 +58,7 @@ public:
    }
  
  private:
-  int32_t _weights_hidden_size = 0;
+  int32_t _weights_hidden_size{0};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQGather.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQGather.h

index f7638261d5527a0ba5dd3c60eaf437af09f93adf..1a0bf4f1966b46bde14be59cd77a4407a146cb61 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQGather.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQGather.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -51,8 +51,8 @@ public:
    void input_hidden_size(int32_t input_hidden_size) { _input_hidden_size = input_hidden_size; }
  
  private:
-  int32_t _axis = 0;
-  int32_t _input_hidden_size = 0;
+  int32_t _axis{0};
+  int32_t _input_hidden_size{0};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchMatMul.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchMatMul.h

index 19999924ee30e578bd96115c5cc0a7980dfbfab2..864b033ed605b35916f941e9685f080a3f504042 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchMatMul.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchMatMul.h
@@ -20,15 +20,15 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
  
  /**
- * @brief BATCHMATMUL in Circle
+ * @brief BATCH_MATMUL in Circle
   */
-class CircleBatchMatMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::BATCHMATMUL>>
+class CircleBatchMatMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::BATCH_MATMUL>>
  {
  public:
    loco::Node *x(void) const { return at(0)->node(); }
@@ -45,8 +45,8 @@ public:
    void adj_y(bool arg) { _adj_y = arg; }
  
  private:
-  bool _adj_x = false;
-  bool _adj_y = false;
+  bool _adj_x{false};
+  bool _adj_y{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h

index 67c0a2102353ee97ff7ca670c2a711faec31d9b3..80fa53b8ebe142ca0fe813e8f4f80307fb9e0f7e 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief BATCH_TO_SPACE_ND in Circle
   */
  class CircleBatchToSpaceND final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::BATCH_TO_SPACE_ND>>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::BATCH_TO_SPACE_ND>>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h

new file mode 100644 (file)

index 0000000..d16281b
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
+#define __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief BIDIRECTIONAL_SEQUENCE_LSTM in Circle
+ */
+class CircleBidirectionalSequenceLSTM final
+  : public FixedArityNode<48, CircleNodeImpl<CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *fw_input_to_input_weights(void) const { return at(1)->node(); }
+  void fw_input_to_input_weights(loco::Node *node) { at(1)->node(node); }
+  loco::Node *fw_input_to_forget_weights(void) const { return at(2)->node(); }
+  void fw_input_to_forget_weights(loco::Node *node) { at(2)->node(node); }
+  loco::Node *fw_input_to_cell_weights(void) const { return at(3)->node(); }
+  void fw_input_to_cell_weights(loco::Node *node) { at(3)->node(node); }
+  loco::Node *fw_input_to_output_weights(void) const { return at(4)->node(); }
+  void fw_input_to_output_weights(loco::Node *node) { at(4)->node(node); }
+
+  loco::Node *fw_recurrent_to_input_weights(void) const { return at(5)->node(); }
+  void fw_recurrent_to_input_weights(loco::Node *node) { at(5)->node(node); }
+  loco::Node *fw_recurrent_to_forget_weights(void) const { return at(6)->node(); }
+  void fw_recurrent_to_forget_weights(loco::Node *node) { at(6)->node(node); }
+  loco::Node *fw_recurrent_to_cell_weights(void) const { return at(7)->node(); }
+  void fw_recurrent_to_cell_weights(loco::Node *node) { at(7)->node(node); }
+  loco::Node *fw_recurrent_to_output_weights(void) const { return at(8)->node(); }
+  void fw_recurrent_to_output_weights(loco::Node *node) { at(8)->node(node); }
+
+  loco::Node *fw_cell_to_input_weights(void) const { return at(9)->node(); }
+  void fw_cell_to_input_weights(loco::Node *node) { at(9)->node(node); }
+  loco::Node *fw_cell_to_forget_weights(void) const { return at(10)->node(); }
+  void fw_cell_to_forget_weights(loco::Node *node) { at(10)->node(node); }
+  loco::Node *fw_cell_to_output_weights(void) const { return at(11)->node(); }
+  void fw_cell_to_output_weights(loco::Node *node) { at(11)->node(node); }
+
+  loco::Node *fw_input_gate_bias(void) const { return at(12)->node(); }
+  void fw_input_gate_bias(loco::Node *node) { at(12)->node(node); }
+  loco::Node *fw_forget_gate_bias(void) const { return at(13)->node(); }
+  void fw_forget_gate_bias(loco::Node *node) { at(13)->node(node); }
+  loco::Node *fw_cell_gate_bias(void) const { return at(14)->node(); }
+  void fw_cell_gate_bias(loco::Node *node) { at(14)->node(node); }
+  loco::Node *fw_output_gate_bias(void) const { return at(15)->node(); }
+  void fw_output_gate_bias(loco::Node *node) { at(15)->node(node); }
+
+  loco::Node *fw_projection_weights(void) const { return at(16)->node(); }
+  void fw_projection_weights(loco::Node *node) { at(16)->node(node); }
+  loco::Node *fw_projection_bias(void) const { return at(17)->node(); }
+  void fw_projection_bias(loco::Node *node) { at(17)->node(node); }
+
+  loco::Node *bw_input_to_input_weights(void) const { return at(18)->node(); }
+  void bw_input_to_input_weights(loco::Node *node) { at(18)->node(node); }
+  loco::Node *bw_input_to_forget_weights(void) const { return at(19)->node(); }
+  void bw_input_to_forget_weights(loco::Node *node) { at(19)->node(node); }
+  loco::Node *bw_input_to_cell_weights(void) const { return at(20)->node(); }
+  void bw_input_to_cell_weights(loco::Node *node) { at(20)->node(node); }
+  loco::Node *bw_input_to_output_weights(void) const { return at(21)->node(); }
+  void bw_input_to_output_weights(loco::Node *node) { at(21)->node(node); }
+
+  loco::Node *bw_recurrent_to_input_weights(void) const { return at(22)->node(); }
+  void bw_recurrent_to_input_weights(loco::Node *node) { at(22)->node(node); }
+  loco::Node *bw_recurrent_to_forget_weights(void) const { return at(23)->node(); }
+  void bw_recurrent_to_forget_weights(loco::Node *node) { at(23)->node(node); }
+  loco::Node *bw_recurrent_to_cell_weights(void) const { return at(24)->node(); }
+  void bw_recurrent_to_cell_weights(loco::Node *node) { at(24)->node(node); }
+  loco::Node *bw_recurrent_to_output_weights(void) const { return at(25)->node(); }
+  void bw_recurrent_to_output_weights(loco::Node *node) { at(25)->node(node); }
+
+  loco::Node *bw_cell_to_input_weights(void) const { return at(26)->node(); }
+  void bw_cell_to_input_weights(loco::Node *node) { at(26)->node(node); }
+  loco::Node *bw_cell_to_forget_weights(void) const { return at(27)->node(); }
+  void bw_cell_to_forget_weights(loco::Node *node) { at(27)->node(node); }
+  loco::Node *bw_cell_to_output_weights(void) const { return at(28)->node(); }
+  void bw_cell_to_output_weights(loco::Node *node) { at(28)->node(node); }
+
+  loco::Node *bw_input_gate_bias(void) const { return at(29)->node(); }
+  void bw_input_gate_bias(loco::Node *node) { at(29)->node(node); }
+  loco::Node *bw_forget_gate_bias(void) const { return at(30)->node(); }
+  void bw_forget_gate_bias(loco::Node *node) { at(30)->node(node); }
+  loco::Node *bw_cell_gate_bias(void) const { return at(31)->node(); }
+  void bw_cell_gate_bias(loco::Node *node) { at(31)->node(node); }
+  loco::Node *bw_output_gate_bias(void) const { return at(32)->node(); }
+  void bw_output_gate_bias(loco::Node *node) { at(32)->node(node); }
+
+  loco::Node *bw_projection_weights(void) const { return at(33)->node(); }
+  void bw_projection_weights(loco::Node *node) { at(33)->node(node); }
+  loco::Node *bw_projection_bias(void) const { return at(34)->node(); }
+  void bw_projection_bias(loco::Node *node) { at(34)->node(node); }
+
+  loco::Node *fw_activation_state(void) const { return at(35)->node(); }
+  void fw_activation_state(loco::Node *node) { at(35)->node(node); }
+  loco::Node *fw_cell_state(void) const { return at(36)->node(); }
+  void fw_cell_state(loco::Node *node) { at(36)->node(node); }
+
+  loco::Node *bw_activation_state(void) const { return at(37)->node(); }
+  void bw_activation_state(loco::Node *node) { at(37)->node(node); }
+  loco::Node *bw_cell_state(void) const { return at(38)->node(); }
+  void bw_cell_state(loco::Node *node) { at(38)->node(node); }
+
+  loco::Node *auxillary_input(void) const { return at(39)->node(); }
+  void auxillary_input(loco::Node *node) { at(39)->node(node); }
+  loco::Node *fw_auxillary_input_to_input_weights(void) const { return at(40)->node(); }
+  void fw_auxillary_input_to_input_weights(loco::Node *node) { at(40)->node(node); }
+  loco::Node *fw_auxillary_input_to_forget_weights(void) const { return at(41)->node(); }
+  void fw_auxillary_input_to_forget_weights(loco::Node *node) { at(41)->node(node); }
+  loco::Node *fw_auxillary_input_to_cell_weights(void) const { return at(42)->node(); }
+  void fw_auxillary_input_to_cell_weights(loco::Node *node) { at(42)->node(node); }
+  loco::Node *fw_auxillary_input_to_output_weights(void) const { return at(43)->node(); }
+  void fw_auxillary_input_to_output_weights(loco::Node *node) { at(43)->node(node); }
+  loco::Node *bw_auxillary_input_to_input_weights(void) const { return at(44)->node(); }
+  void bw_auxillary_input_to_input_weights(loco::Node *node) { at(44)->node(node); }
+  loco::Node *bw_auxillary_input_to_forget_weights(void) const { return at(45)->node(); }
+  void bw_auxillary_input_to_forget_weights(loco::Node *node) { at(45)->node(node); }
+  loco::Node *bw_auxillary_input_to_cell_weights(void) const { return at(46)->node(); }
+  void bw_auxillary_input_to_cell_weights(loco::Node *node) { at(46)->node(node); }
+  loco::Node *bw_auxillary_input_to_output_weights(void) const { return at(47)->node(); }
+  void bw_auxillary_input_to_output_weights(loco::Node *node) { at(47)->node(node); }
+
+public:
+  float cell_clip(void) const { return _cell_clip; }
+  void cell_clip(float cell_clip) { _cell_clip = cell_clip; }
+  float proj_clip(void) const { return _proj_clip; }
+  void proj_clip(float proj_clip) { _proj_clip = proj_clip; }
+  bool merge_outputs(void) const { return _merge_outputs; }
+  void merge_outputs(bool merge_outputs) { _merge_outputs = merge_outputs; }
+  bool time_major(void) const { return _time_major; }
+  void time_major(bool time_major) { _time_major = time_major; }
+  bool asymmetric_quantize_inputs(void) const { return _asymmetric_quantize_inputs; }
+  void asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    _asymmetric_quantize_inputs = asymmetric_quantize_inputs;
+  }
+
+private:
+  float _cell_clip{0.0f};
+  float _proj_clip{0.0f};
+  bool _merge_outputs{false};
+  bool _time_major{false};
+  bool _asymmetric_quantize_inputs{false};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h

new file mode 100644 (file)

index 0000000..fb2eb08
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
+#define __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT in Circle
+ */
+class CircleBidirectionalSequenceLSTMOut final
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT>>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+  int32_t index(void) const { return _index; }
+  void index(int32_t index) { _index = index; }
+
+private:
+  int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCast.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCast.h

index 9a89d0b2b66b5abb4259d00e1860ad0c487e8b8b..0b793607f2218de60702fbb45b86799d8506205d 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCast.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCast.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCeil.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCeil.h

index 8a8715dcf0f72f2a1afba92f8b5fdec5c71793af..3d7a7ebc7303e50883510aeb705fc3bda9418723 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCeil.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCeil.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h

index dea1a46135fd427713ea65d3d7d3dfbdc9311028..2746a0a2e143d7639a2e83566413ae037833f391 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  #include "luci/IR/VariadicArityNode.h"
  
  #include <cassert>
@@ -33,12 +33,12 @@ namespace luci
   * @brief CONCATENATION in Circle
   */
  class CircleConcatenation final
-    : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
  public:
    CircleConcatenation(uint32_t arity)
-      : VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>(arity)
+    : VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>(arity)
    {
      // TODO Support when arity is 0
      assert(arity >= 1);
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h

index 250282049b71f70b2542d874efdbd1299946f870..e44363d14aca136ec9018b9f12013cf2c53cbe05 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  #include <loco/IR/DataTypeTraits.h>
  
@@ -33,9 +33,6 @@ namespace luci
   */
  class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLECONST>>
  {
-public:
-  CircleConst() = default;
-
  public:
    template <loco::DataType DT> uint32_t size(void) const;
    template <loco::DataType DT> void size(uint32_t size);
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h

index 13657cee4d2c3ff39029d1950161e35a54e4984b..7c390940e731eb5d5b74399e4eb340fe7da72ae3 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h
@@ -24,7 +24,7 @@
  #include "luci/IR/AttrStride.h"
  #include "luci/IR/AttrDilation.h"
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -33,8 +33,8 @@ namespace luci
   * @brief CONV_2D in Circle
   */
  class CircleConv2D final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::CONV_2D>>,
-                           public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
-                           public LuciNodeMixin<LuciNodeTrait::Bias>
+                           public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+                           public CircleNodeMixin<CircleNodeTrait::Bias>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
@@ -57,7 +57,7 @@ public:
    Dilation *dilation(void) { return &_dilation; }
  
  private:
-  Padding _padding = Padding::UNDEFINED;
+  Padding _padding{Padding::UNDEFINED};
    Stride _stride;
    Dilation _dilation;
  };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h

index 07ced620a0b8df43258c140e453b45be44991278..cff04906daad430371b580c91917721c746acd48 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCustom.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCustom.h

index 6c722b7668ef4686a5c081b470f43af22365cdd7..b21cc679fe8112739722e83c33041a08baf2b92f 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCustom.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCustom.h
@@ -29,19 +29,23 @@ namespace luci
  class CircleCustom final : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>
  {
  public:
-  CircleCustom(uint32_t arity) : VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>(arity)
+  CircleCustom(uint32_t arity, uint32_t out)
+    : VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>(arity), _output_count(out)
    {
      // TODO Support when arity is 0
      assert(arity >= 1);
+    assert(out > 0);
    }
  
  public:
    uint32_t numInputs(void) const { return arity(); }
+  uint32_t numOutputs(void) const { return _output_count; }
  
  public:
    Node *inputs(uint32_t index) const { return at(index)->node(); }
    void inputs(uint32_t index, Node *node) { at(index)->node(node); }
  
+public:
    const std::vector<uint8_t> &custom_options(void) const { return _custom_options; }
    void custom_options(const std::vector<uint8_t> &custom_options)
    {
@@ -54,6 +58,7 @@ public:
  private:
    std::vector<uint8_t> _custom_options;
    std::string _custom_code;
+  uint32_t _output_count{0};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCustomOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCustomOut.h

index 36b8e4aed874683fb032a587ae04b400a95079a8..91a89c15164194845e16a171299bbaca3b9c8724 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCustomOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCustomOut.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief Virtual CIRCLECUSTOMOUT in Circle
   */
  class CircleCustomOut final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLECUSTOMOUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLECUSTOMOUT>>
  {
-public:
-  CircleCustomOut() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthToSpace.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthToSpace.h

index e19282b971c1d59b9de0b3932d3cfe54a31f029b..85b567fb777ee19be032d47c40da6bf25ebadbc1 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthToSpace.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthToSpace.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,18 +29,18 @@ namespace luci
   * @brief DEPTH_TO_SPACE in Circle
   */
  class CircleDepthToSpace final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DEPTH_TO_SPACE>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DEPTH_TO_SPACE>>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
  
  public:
-  int block_size(void) const { return _block_size; }
-  void block_size(int block_size) { _block_size = block_size; }
+  int32_t block_size(void) const { return _block_size; }
+  void block_size(int32_t block_size) { _block_size = block_size; }
  
  private:
-  int _block_size{0};
+  int32_t _block_size{0};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h

index eb058cec149cd06a48d1e720bd9ef93b74598b87..046aa590833280b788885be0e363987c7b9b3a58 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h
@@ -25,7 +25,7 @@
  #include "luci/IR/AttrPadding.h"
  #include "luci/IR/AttrStride.h"
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -34,9 +34,9 @@ namespace luci
   * @brief DEPTHWISE_CONV_2D in Circle
   */
  class CircleDepthwiseConv2D final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::DEPTHWISE_CONV_2D>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
-      public LuciNodeMixin<LuciNodeTrait::Bias>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::DEPTHWISE_CONV_2D>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+    public CircleNodeMixin<CircleNodeTrait::Bias>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
@@ -62,9 +62,9 @@ public:
    Dilation *dilation(void) { return &_dilation; }
  
  private:
-  Padding _padding = Padding::UNDEFINED;
+  Padding _padding{Padding::UNDEFINED};
    Stride _stride;
-  int32_t _depth_multiplier = 0;
+  int32_t _depth_multiplier{0};
    Dilation _dilation;
  };
  
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDequantize.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDequantize.h

index 847c5dfc52d36a22d21c7ef4d4fd33dd8dde6954..c3ee44253bdfdb9c37772a4c83223b26c2d38bae 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleDequantize.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDequantize.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h

index 1d4d3a23950d5217be197979f05463b36f5e4f29..fcc3f427c6c552b8707697ac445a683814e57070 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h
@@ -24,7 +24,7 @@
  #include "luci/IR/AttrPadding.h"
  #include "luci/IR/AttrStride.h"
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -33,11 +33,8 @@ namespace luci
   * @brief DIV in Circle
   */
  class CircleDiv final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::DIV>>,
-                        public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                        public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
-public:
-  CircleDiv() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleElu.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleElu.h

index fbb2f3533d3cbb644b2dbe54d088ba365c57d2ae..721edd9ae6e8652bf1ed3cd216e877329c6bae76 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleElu.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleElu.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleElu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ELU>>
  {
-public:
-  CircleElu() = default;
-
  public:
    loco::Node *features(void) const { return at(0)->node(); }
    void features(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h

index 2087d097ade90643268df3c353433d66c8166d27..69697ac7eb48eb4803c2222b8d33594528192a1a 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h

index 97aecb30a8159bef75fdf1c97011fa01ef773875..b8a5d45615978dcd793ef564047b4e2dc56e608b 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleExpandDims.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleExpandDims.h

index f7021961447bbdf329680d8589fb87db5df891d5..15bfe6a294f926f897919470fe37885aeadefaf9 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleExpandDims.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleExpandDims.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleExpandDims final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::EXPAND_DIMS>>
  {
-public:
-  CircleExpandDims() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFakeQuant.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFakeQuant.h

new file mode 100644 (file)

index 0000000..9e31596
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFakeQuant.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_FAKE_QUANT_H__
+#define __LUCI_IR_CIRCLE_FAKE_QUANT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief FAKE_QUANT in Circle
+ * @note  'inputs' came from TF.quantize.fake_quant_from_min_max_vars
+ */
+class CircleFakeQuant final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::FAKE_QUANT>>
+{
+public:
+  loco::Node *inputs(void) const { return at(0)->node(); }
+  void inputs(loco::Node *node) { at(0)->node(node); }
+
+public:
+  float min(void) const { return _min; }
+  void min(float min) { _min = min; }
+
+  float max(void) const { return _max; }
+  void max(float max) { _max = max; }
+
+  int32_t num_bits(void) const { return _num_bits; }
+  void num_bits(int32_t num_bits) { _num_bits = num_bits; }
+
+  bool narrow_range(void) const { return _narrow_range; }
+  void narrow_range(bool narrow_range) { _narrow_range = narrow_range; }
+
+private:
+  float _min{0.0f};
+  float _max{0.0f};
+  int32_t _num_bits{0};
+  bool _narrow_range{false};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEGATHER_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFill.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFill.h

index bfc65274a93dfefd05452b37be471eb2c0aa36cf..183794d41a770e65e9626e782a7ab2b3143e99e5 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFill.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFill.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloor.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloor.h

index 7e10547b61210c609b93f335c16c7ce7d1dec4ad..ce6807e986570151464000707b8308931325b944 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloor.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloor.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorDiv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorDiv.h

index ba9db010c0d51fa4f9f021356fc89617b9444372..bf76e37b61e1978bd1cd59f5ff85ee1a84f974c8 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorDiv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorDiv.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorMod.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorMod.h

index 4d13717a03317dbc944b58aac2f6364082d1d431..1af0af7585599393f0717121de6c1f377d6e8c09 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorMod.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorMod.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h

index 952befc87a6d889a788b01bd2d26ddac1c086716..2862cadb2cb52c3044738580d05f347460154266 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,9 @@ namespace luci
   * @brief FULLY_CONNECTED in Circle
   */
  class CircleFullyConnected final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::FULLY_CONNECTED>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
-      public LuciNodeMixin<LuciNodeTrait::Bias>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::FULLY_CONNECTED>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+    public CircleNodeMixin<CircleNodeTrait::Bias>
  {
  public:
    enum class WeightsFormat
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h

index 1e8c4982a6971fdb83c4ade1f716fd099bf3cc35..78fa2fc2863f0e5ac93c4b6d1fa776c2d1a1ddb8 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -42,7 +42,7 @@ public:
    void axis(int32_t axis) { _axis = axis; }
  
  private:
-  int32_t _axis = 0;
+  int32_t _axis{0};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGatherNd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGatherNd.h

index 3423a82162ec857a41402e484ca3b5955e3996ed..d6f34f1ea49a5d7dcd8a906eee103cb15fe8f97f 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleGatherNd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGatherNd.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGreater.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGreater.h

index 040a4e3387b2baa02ab15f23ae7519e9f1830ea2..a03b6c74944a79c2f4d6d7f69696aeba9aa3bb3d 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleGreater.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGreater.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGreaterEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGreaterEqual.h

index 82bdab21246d5e56a78ead371648ee67bfb1981f..e435320b241e81bb3fe0d58ce4d804d9e570171a 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleGreaterEqual.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGreaterEqual.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief GREATER EQUAL in Circle
   */
  class CircleGreaterEqual final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::GREATER_EQUAL>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::GREATER_EQUAL>>
  {
  public:
    loco::Node *x(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleIf.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleIf.h

index 2f9eac2116fb5e31fa7c1ef9257849574b320563..1c037a406a0ec7644894bf0e3493bdb35e2ad0e2 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleIf.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleIf.h
@@ -34,7 +34,7 @@ class CircleIf final : public VariadicArityNode<CircleNodeImpl<CircleOpcode::IF>
  {
  public:
    CircleIf(uint32_t arity, uint32_t out)
-      : VariadicArityNode<CircleNodeImpl<CircleOpcode::IF>>(arity + 1), _output_count(out)
+    : VariadicArityNode<CircleNodeImpl<CircleOpcode::IF>>(arity + 1), _output_count(out)
    {
      assert(arity > 0);
      assert(out > 0);
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleIfOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleIfOut.h

index 3654e943b217fecad4a4a74f84a327f576fe9d99..5adaaa4478cc96a349e06cf0b1649512c0add1e6 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleIfOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleIfOut.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleIfOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEIFOUT>>
  {
-public:
-  CircleIfOut() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h

index 4a7d36a4eecd806d2d82ecee832b64f13b824a3b..e0be9aa6e558b8701595e5f4de09403941dcd614 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  #include <loco/IR/DataTypeTraits.h>
  #include <loco/IR/GraphInputIndex.h>
@@ -34,9 +34,6 @@ namespace luci
   */
  class CircleInput final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEINPUT>>
  {
-public:
-  CircleInput() = default;
-
  public:
    void index(const loco::GraphInputIndex &index);
    loco::GraphInputIndex index(void) const;
@@ -44,7 +41,7 @@ public:
    bool indexed(void) const { return _index != -1; }
  
  private:
-  int64_t _index = -1; // Uninitialized
+  int64_t _index{-1}; // Uninitialized
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h

index db0faa05ea0d3be064c34ba98db52a4afae7d37d..65c34194dcaaaf34fef33bce4cf75eb369b73d4c 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,8 +30,8 @@ namespace luci
   * @brief INSTANCE_NORM in Circle
   */
  class CircleInstanceNorm final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
  public:
    /// @note  Currently only support FLOAT32 as input node
@@ -44,11 +44,12 @@ public:
    loco::Node *beta(void) const { return at(2)->node(); }
    void beta(loco::Node *node) { at(2)->node(node); }
  
+public:
    float epsilon() const { return _epsilon; }
    void epsilon(float epsilon) { _epsilon = epsilon; }
  
  private:
-  float _epsilon = 1e-05;
+  float _epsilon{1e-05};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Normalize.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Normalize.h

index efa932d95578fdf294702afa8982cc5b38d36b8d..eb2b372ce30f7c515afbb6b0fc704b6f4dbf09bf 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Normalize.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Normalize.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,8 +30,8 @@ namespace luci
   * @brief L2_NORMALIZATION in Circle
   */
  class CircleL2Normalize final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_NORMALIZATION>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_NORMALIZATION>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
  public:
    loco::Node *x(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Pool2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Pool2D.h

index 7c76ee5d0c0de1660b3a95eb7f166f9876728f69..624d29e9ebf74ca2c4aff52e0aca8e8188ded0aa 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Pool2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Pool2D.h
@@ -24,7 +24,7 @@
  #include "luci/IR/AttrPadding.h"
  #include "luci/IR/AttrStride.h"
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -33,15 +33,13 @@ namespace luci
   * @brief L2_POOL_2D in Circle
   */
  class CircleL2Pool2D final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_POOL_2D>>,
-                             public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                             public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
-public:
-  CircleL2Pool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
  public:
    loco::Node *value(void) const { return at(0)->node(); }
    void value(loco::Node *node) { at(0)->node(node); }
  
+public:
    Padding padding() const { return _padding; }
    void padding(Padding padding) { _padding = padding; }
  
@@ -52,7 +50,7 @@ public:
    Stride *stride(void) { return &_stride; }
  
  private:
-  Padding _padding;
+  Padding _padding{Padding::UNDEFINED};
    Stride _stride;
    Filter _filter;
  };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLeakyRelu.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLeakyRelu.h

index d6ac97fc077913a0b4f4cb66d8efb2f5364645bf..c8e93af91408f9b393a1f82245938dc6b3cf4b9b 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLeakyRelu.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLeakyRelu.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,18 +30,16 @@ namespace luci
   */
  class CircleLeakyRelu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LEAKY_RELU>>
  {
-public:
-  CircleLeakyRelu() = default;
-
  public:
    loco::Node *features(void) const { return at(0)->node(); }
    void features(loco::Node *node) { at(0)->node(node); }
  
+public:
    float alpha() const { return _alpha; }
    void alpha(float alpha) { _alpha = alpha; }
  
  private:
-  float _alpha = 0.2f;
+  float _alpha{0.2f};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLess.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLess.h

index cd6cf18720a545e88abde58fd29e9acd97503c31..7adf67842a607df901725a6b6130fddceacb0f4a 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLess.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLess.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLessEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLessEqual.h

index 4c7c6a49b6c205fd45459144880d9956bcf689cf..eb89624941727cabb3ed0f5ec264398ad80d82e4 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLessEqual.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLessEqual.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLocalResponseNormalization.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLocalResponseNormalization.h

index 8ad2b40fdc6d6d876e1abaddaad16c27ae2f8fcc..4d324700ed131eaa284ed0f9d311ca51ce78c35a 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLocalResponseNormalization.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLocalResponseNormalization.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief LOCAL_RESPONSE_NORMALIZATION in Circle
   */
  class CircleLocalResponseNormalization final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOCAL_RESPONSE_NORMALIZATION>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOCAL_RESPONSE_NORMALIZATION>>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLog.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLog.h

index aeb13fed9172eb18a5631adde6b8365a7e92b8ef..2cc57ce2d6d9b7897c4f83b419dbcdc9632cbbbc 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLog.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLog.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogSoftmax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogSoftmax.h

index 5dfd2c1f9120ac00b1a08d84ab7805237edc83ba..b73ff7c2aac46fdcb0e9f0b6b391ee3865663078 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogSoftmax.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogSoftmax.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalAnd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalAnd.h

index 975f6dbc7a92ded566e6ea60682e63676f846d1e..9943c71cd6b17ebfb519d0e5406a65b3d14db40d 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalAnd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalAnd.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h

index 749dbe5183554f264e81123f195fa4107dd0b6a3..369a3e7bf6762586461d219504309da3183c17cf 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h

index 570be57afdf4718e1aaf9c8c377c0cf3660deb1e..c54ec3ebff06c6a398a5b89f2ee1ec0dc5ce2253 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogistic.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogistic.h

index 8328cb3287949bb63373b0985f769e04bd0a9220..1f95e0f77d858d94e3ab203899b6d66324a5c494 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogistic.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogistic.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleLogistic final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOGISTIC>>
  {
-public:
-  CircleLogistic() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixDiag.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixDiag.h

index dca6538c38bc1a447601ca5323e7c2525e53990d..f8bf259f9a38cfd592b67245891fe01ad9ccf2d0 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixDiag.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixDiag.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixSetDiag.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixSetDiag.h

index c1f5f3023a70a1e12a524799344c6b20b7ef69cd..76aeaff402491a3131083ebde596406cddb319b4 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixSetDiag.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixSetDiag.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief MATRIX_SET_DIAG in Circle
   */
  class CircleMatrixSetDiag final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MATRIX_SET_DIAG>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MATRIX_SET_DIAG>>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h

index 1eb6532ffcab35a1ebf30b5da0e7050f5ab52e45..557240d541e51a4108eb27abcb3b1e28e4a91b5d 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h
@@ -24,7 +24,7 @@
  #include "luci/IR/AttrPadding.h"
  #include "luci/IR/AttrStride.h"
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -33,15 +33,13 @@ namespace luci
   * @brief MAX_POOL_2D in Circle
   */
  class CircleMaxPool2D final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::MAX_POOL_2D>>,
-                              public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                              public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
-public:
-  CircleMaxPool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
  public:
    loco::Node *value(void) const { return at(0)->node(); }
    void value(loco::Node *node) { at(0)->node(node); }
  
+public:
    Padding padding() const { return _padding; }
    void padding(Padding padding) { _padding = padding; }
  
@@ -52,7 +50,7 @@ public:
    Stride *stride(void) { return &_stride; }
  
  private:
-  Padding _padding;
+  Padding _padding{Padding::UNDEFINED};
    Stride _stride;
    Filter _filter;
  };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h

index 6f789bc14a63c7270f6c9be396020c251fc107ca..317cea308cc848278e2b9e4f7f832027ed7102e0 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h

index 7f8aeb5aa4c4199ce2c3da2ca596ee2541aa670c..f56e4f4c03ab7d5baf7c3467d45a01d03d83f955 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -42,7 +42,7 @@ public:
    void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
  
  private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMinimum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMinimum.h

index 79d5a6f175a393aee0cc90dfe99e1e6d76873c1e..959d9c93bb09d4a57059ef9f64ec3e1f9e5fd263 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMinimum.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMinimum.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMirrorPad.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMirrorPad.h

index 68db8f6f3262d14308b422435bd96af7cfb17e0b..c69e8f7c178788f7a9b732efe8adde0034dd7c1b 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMirrorPad.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMirrorPad.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  #include "luci/IR/AttrMirrorPadMode.h"
  
  namespace luci
@@ -31,9 +31,6 @@ namespace luci
   */
  class CircleMirrorPad final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MIRROR_PAD>>
  {
-public:
-  CircleMirrorPad() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h

index 67e8971706e1202cb6c60ff4ea8b39a2893da5a7..85ed694b38d3066db074a1ec657dbfa2cbd9a08a 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,7 +30,7 @@ namespace luci
   * @brief MUL in Circle
   */
  class CircleMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MUL>>,
-                        public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                        public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
  public:
    loco::Node *x(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNeg.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNeg.h

index 4149ac4a7f24fbdc3ee593ea53033cb66ae8ef1a..adea3fb83f40d0bb9daf39a329d24774ad9315c4 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNeg.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNeg.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h

index 69f3368c0f69358257d0fa2ba30a56fe4a17cbec..b47404bb0df962e537835fb449c58f6a45ee4cb0 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief NON_MAX_SUPPRESSION_V4 in Circle
   */
  class CircleNonMaxSuppressionV4 final
-    : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V4>>
+  : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V4>>
  {
  public:
    loco::Node *boxes(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h

index a24dc3e9c289a4f9b26b0a891a767db233d9086c..7e6923b5eb22c869f5e40e7e13ddb985774ade83 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief Virtual NONMAXSUPPRESSIONV4OUT in Circle
   */
  class CircleNonMaxSuppressionV4Out final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT>>
  {
-public:
-  CircleNonMaxSuppressionV4Out() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h

index 52d682147d219f28db31fdb6462ef0e3d85d6fb7..77086ede7ddc84a43fb822afa7a2b0277584ba03 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief NON_MAX_SUPPRESSION_V5 in Circle
   */
  class CircleNonMaxSuppressionV5 final
-    : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
+  : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
  {
  public:
    loco::Node *boxes(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h

index 0c6989cc7692fcd183f0785a6baa9b8ff70466fa..63d061f11285531c462c77a901b0c23aa8dd5088 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief Virtual NONMAXSUPPRESSIONV5OUT in Circle
   */
  class CircleNonMaxSuppressionV5Out final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
  {
-public:
-  CircleNonMaxSuppressionV5Out() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNotEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNotEqual.h

index cca7a5e2296872b52287bbf7d10e041878341061..add6a0747d97ee6824cead8cf206b00960ad1466 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNotEqual.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNotEqual.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleOneHot.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleOneHot.h

index 665e01d4854390cb50bc7054b3f6c068d0f257d0..b3eb0f43669b1f5656ce279244e4ab03ed8de202 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleOneHot.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleOneHot.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -48,7 +48,7 @@ public:
    void axis(int32_t axis) { _axis = axis; }
  
  private:
-  int32_t _axis = -1;
+  int32_t _axis{-1};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h

index 67e55f1a11f30d4811bb86c7b7cb06b57951711b..eb02f824ef551efab2fa3d39addfa5315d4baef7 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  #include <loco/IR/GraphOutputIndex.h>
  
@@ -34,8 +34,6 @@ namespace luci
  class CircleOutput final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUT>>
  {
  public:
-  CircleOutput() = default;
-
    void index(const loco::GraphOutputIndex &index);
    loco::GraphOutputIndex index(void) const;
  
@@ -46,7 +44,7 @@ public:
    void from(loco::Node *node) { at(0)->node(node); }
  
  private:
-  int64_t _index = -1; // Uninitialized
+  int64_t _index{-1}; // Uninitialized
  };
  
  /**
@@ -54,7 +52,7 @@ private:
   */
  // TODO remove CircleOutputDummy
  class CircleOutputDummy final
-    : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTDUMMY>>
+  : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTDUMMY>>
  {
  public:
    CircleOutputDummy() = default;
@@ -64,7 +62,7 @@ public:
   * @brief CircleOutputExclude is used to specifying not exported nodes
   */
  class CircleOutputExclude final
-    : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTEXCLUDE>>
+  : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTEXCLUDE>>
  {
  public:
    CircleOutputExclude() = default;
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePRelu.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePRelu.h

index 6937775125371af96be983ba8ccf45d5e58fc170..3c5559db2bd172b856898af2697d44a67139c771 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CirclePRelu.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePRelu.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CirclePRelu final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::PRELU>>
  {
-public:
-  CirclePRelu() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h

index 31599bda0f32492856dfd9a3068d70ebc1326809..ede217789ae88d33b0fb3a8ccee27c7ac99a010d 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CirclePad final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::PAD>>
  {
-public:
-  CirclePad() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h

index 563cfd9a4850bd8769d24099cd87b65b8c58f1c4..644e2bb27f2ba23ada805966e79321ac622001de 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CirclePadV2 final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::PADV2>>
  {
-public:
-  CirclePadV2() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePow.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePow.h

index 006e3dd86bfcf5aa6fcf20b12957b892128fa336..40c5a829d3c3ff2f21be903c18204376559ba9d8 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CirclePow.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePow.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CirclePow final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::POW>>
  {
-public:
-  CirclePow() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRange.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRange.h

index 977a37a526e9a6047f94175a53731b8fc2cda878..56f8a2eba1b1a72e487ee12a7d9e94e6487db3ca 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRange.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRange.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRank.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRank.h

index ba6d67f69dd81cc8c5821714aa3b22ddac423d24..034f251bc8de0aa74de79f56880c30fbc6101be3 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRank.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRank.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceAny.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceAny.h

index 0456be8632410b9b73648ba98e22d1c7a835a62d..c64dbbdf83d8535a18f433f298a774038bc2cb88 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceAny.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceAny.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -42,7 +42,7 @@ public:
    void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
  
  private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMax.h

index 925c977e5b918b42e3ba36dfe91c0d0b798b11f2..97cbecd088690c74fd57ed8c14903d91f47531fc 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMax.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMax.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -42,7 +42,7 @@ public:
    void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
  
  private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMin.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMin.h

index fd789ae5e0645da6c2d474507b15b22441f8853e..33708928faae0662546c3ddb3e24af0aba6cc5e7 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMin.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMin.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -42,7 +42,7 @@ public:
    void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
  
  private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceProd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceProd.h

index b7d226255a0c80fdaca9806f7c343370dc7e7a5c..3689ee532f033778d564b12d3ddedd63653109ad 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceProd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceProd.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -42,7 +42,7 @@ public:
    void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
  
  private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h

index 91272d2bf661274092638da70c379800b0b0b0f8..6148caa03e88b397e488ead5b7a582cf090f9a3b 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleRelu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU>>
  {
-public:
-  CircleRelu() = default;
-
  public:
    loco::Node *features(void) const { return at(0)->node(); }
    void features(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h

index b4274ded9c5ccef5fee4387b1354fb033ef4a322..0fa25e87363170c39f237aab52b5c0ca88d1207a 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleRelu6 final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU6>>
  {
-public:
-  CircleRelu6() = default;
-
  public:
    loco::Node *features(void) const { return at(0)->node(); }
    void features(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReluN1To1.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReluN1To1.h

index a5c5710c25856a72ccac8675b52f6464a60218ca..13c0d166fe364564766712c9973db63103c3dc8a 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReluN1To1.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReluN1To1.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleReluN1To1 final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU_N1_TO_1>>
  {
-public:
-  CircleReluN1To1() = default;
-
  public:
    loco::Node *features(void) const { return at(0)->node(); }
    void features(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h

index b13144f7eff82bd69e95e4b396da99073db1c563..090df4044d48c2052024fa74d87e42d420c47107 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,15 +30,12 @@ namespace luci
   */
  class CircleReshape final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESHAPE>>
  {
-public:
-  CircleReshape() = default;
-
  public:
    loco::Node *tensor(void) const { return at(0)->node(); }
    void tensor(loco::Node *node) { at(0)->node(node); }
  
    // NOTE shape is optional and can be CircleConst or any other type
-  //      and also can be CircleOutputDummy when reshape option does not exist
+  //      and also should be CircleOutputDummy when reshape option does not exist
    loco::Node *shape(void) const { return at(1)->node(); }
    void shape(loco::Node *node) { at(1)->node(node); }
  
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeBilinear.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeBilinear.h

index 3c8223338b482982bb1e1abd77bc82e396b4ea40..091916a2b6293e63360051ac5a54c716ea568829 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeBilinear.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeBilinear.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief RESIZE_BILINEAR in Circle
   */
  class CircleResizeBilinear final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_BILINEAR>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_BILINEAR>>
  {
-public:
-  CircleResizeBilinear() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
@@ -41,6 +38,7 @@ public:
    loco::Node *size(void) const { return at(1)->node(); }
    void size(loco::Node *node) { at(1)->node(node); }
  
+public:
    bool align_corners() const { return _align_corners; }
    void align_corners(bool value) { _align_corners = value; }
  
@@ -48,8 +46,8 @@ public:
    void half_pixel_centers(bool value) { _half_pixel_centers = value; }
  
  private:
-  bool _align_corners = false;
-  bool _half_pixel_centers = false;
+  bool _align_corners{false};
+  bool _half_pixel_centers{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeNearestNeighbor.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeNearestNeighbor.h

index dc32ebee7e73e5fd20a17387ebb903eed068e631..ab880d7679daa0fdda90af9b31eefb3107cc6762 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeNearestNeighbor.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeNearestNeighbor.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief RESIZE_NEAREST_NEIGHBOR in Circle
   */
  class CircleResizeNearestNeighbor final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_NEAREST_NEIGHBOR>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_NEAREST_NEIGHBOR>>
  {
-public:
-  CircleResizeNearestNeighbor() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
@@ -41,11 +38,12 @@ public:
    loco::Node *size(void) const { return at(1)->node(); }
    void size(loco::Node *node) { at(1)->node(node); }
  
+public:
    bool align_corners() const { return _align_corners; }
    void align_corners(bool value) { _align_corners = value; }
  
  private:
-  bool _align_corners = false;
+  bool _align_corners{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseSequence.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseSequence.h

index b0766dd3e916c4f1e8db53eb2818650581329e17..5f089a768e77dcfe3c5650013b55ff74d4aea760 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseSequence.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseSequence.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief REVERSE_SEQUENCE in Circle
   */
  class CircleReverseSequence final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::REVERSE_SEQUENCE>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::REVERSE_SEQUENCE>>
  {
-public:
-  CircleReverseSequence() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
@@ -42,15 +39,15 @@ public:
    void seq_lengths(loco::Node *node) { at(1)->node(node); }
  
  public:
-  int seq_axis(void) const { return _seq_axis; }
-  void seq_axis(int seq_axis) { _seq_axis = seq_axis; }
+  int32_t seq_axis(void) const { return _seq_axis; }
+  void seq_axis(int32_t seq_axis) { _seq_axis = seq_axis; }
  
-  int batch_axis(void) const { return _batch_axis; }
-  void batch_axis(int batch_axis) { _batch_axis = batch_axis; }
+  int32_t batch_axis(void) const { return _batch_axis; }
+  void batch_axis(int32_t batch_axis) { _batch_axis = batch_axis; }
  
  private:
-  int _seq_axis{0};
-  int _batch_axis{0};
+  int32_t _seq_axis{0};
+  int32_t _batch_axis{0};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseV2.h

index 71d9f65aa3f050d571aaaef86b05143dd18b8a7e..96b6a793d2004d7ad2fcf9861797a7fb7c914ba1 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseV2.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseV2.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRound.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRound.h

index 30296ce9eaa85cdfdec2d8ee663aa853d103af4a..e340266edef14f249e440d87d4a09f17c7d6c012 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRound.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRound.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleRound final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ROUND>>
  {
-public:
-  CircleRound() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h

index 873397bce6aa2996a137e8270fb7279444c05830..7907f326b8c257d44e2898eb65f669fb71c55679 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleRsqrt final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RSQRT>>
  {
-public:
-  CircleRsqrt() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleScatterNd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleScatterNd.h

index 9f93a0a80b1b9ebb137e57446d28c29b3cdbeff9..fda3abafca93f01739811e44dc8a97ca2154afdb 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleScatterNd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleScatterNd.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSegmentSum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSegmentSum.h

index 416d617b2850c05f1ef94584586029c540ae9d21..e7227e9ee168df1b2a1d7a05bcca2e597c27f266 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSegmentSum.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSegmentSum.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleSegmentSum final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SEGMENT_SUM>>
  {
-public:
-  CircleSegmentSum() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSelect.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSelect.h

index 7276471687054cadff62b81f4944d4f71190e562..6f778d72dbc0779c9e02560b135d862bb7409221 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSelect.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSelect.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleSelect final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SELECT>>
  {
-public:
-  CircleSelect() = default;
-
  public:
    loco::Node *condition(void) const { return at(0)->node(); }
    void condition(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSelectV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSelectV2.h

index 7ac3c0524902fe4697d7aa39b9ed198e529d1cbc..7969cc2aa76c4fe275c780f9b51d185f31603384 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSelectV2.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSelectV2.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleSelectV2 final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SELECT_V2>>
  {
-public:
-  CircleSelectV2() = default;
-
  public:
    loco::Node *condition(void) const { return at(0)->node(); }
    void condition(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleShape.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleShape.h

index ff20ce6840a8f199df9915c48db114169be0e085..903894dbd1159bec53e7a8234ad2c30f624dbeeb 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleShape.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleShape.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleShape final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SHAPE>>
  {
-public:
-  CircleShape() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSin.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSin.h

index 5624db2534c9027be63c38f73a492213349ed5ed..25dc18b0d5b49282cc639c4c969063a22c607d76 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSin.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSin.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSlice.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSlice.h

index a2113643daeebe2b9a1c35c11ca525eb833b094c..98556d7a6cc5fc6857941fcd1ec3c1f1ea74dbc5 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSlice.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSlice.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h

index 7166a329b34c199a339509b8169f7678af14f501..d10cb1682fb8077bbbf2070bef5c09f737382185 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToBatchND.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToBatchND.h

index 042ebffcd953b6908b3dd2f8afba09a92bd0e7ba..ef715c6d0de9252c226a21224f51355945dcfa10 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToBatchND.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToBatchND.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief SPACE_TO_BATCH_ND in Circle
   */
  class CircleSpaceToBatchND final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SPACE_TO_BATCH_ND>>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SPACE_TO_BATCH_ND>>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToDepth.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToDepth.h

index 420a4cb9625efce7dbf2cefacdfa14c4f6b07536..387e0d80f86c29cf909ac6a6ad53f4dcaa68a606 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToDepth.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToDepth.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,18 +29,18 @@ namespace luci
   * @brief SPACE_TO_DEPTH in Circle
   */
  class CircleSpaceToDepth final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SPACE_TO_DEPTH>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SPACE_TO_DEPTH>>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
  
  public:
-  int block_size(void) const { return _block_size; }
-  void block_size(int block_size) { _block_size = block_size; }
+  int32_t block_size(void) const { return _block_size; }
+  void block_size(int32_t block_size) { _block_size = block_size; }
  
  private:
-  int _block_size{0};
+  int32_t _block_size{0};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h

index 7e80304b0cbbc3d8f36bd9e9ee7afa7dba41bc23..94a20c064c4790235ab9302199612a158e14e5e2 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief SPARSE_TO_DENSE in Circle
   */
  class CircleSparseToDense final
-    : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::SPARSE_TO_DENSE>>
+  : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::SPARSE_TO_DENSE>>
  {
  public:
    loco::Node *indices(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplit.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplit.h

index 0eda19501d51288fd2a8a06990752777ec766119..0cb953131cbc9114ee67cb09857207749cefe0ab 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplit.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplit.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitOut.h

index 6bf4a9fef23f4fb1269ac3f13fa0005837c164dc..a507740e4f3bf1397a6b83f187cccdf7c20dedec 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitOut.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleSplitOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITOUT>>
  {
-public:
-  CircleSplitOut() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitV.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitV.h

index 1b7d55534d376e7d76bc0d46e674a62f3b329f1d..cb02cbbcf51800afa0c8724fda794d19f315ffdc 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitV.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitV.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitVOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitVOut.h

index d3b2f1e5ae92a3b2bfcbda6db93f7c3fc77e3d54..adf79f30c79b717a2a2f1e1ed12629b01cc8e2a8 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitVOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitVOut.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief Virtual CIRCLESPLITVOUT in Circle
   */
  class CircleSplitVOut final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITVOUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITVOUT>>
  {
-public:
-  CircleSplitVOut() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h

index c96ca8498e0e9491a04d17c6d8a49cd2a9c984a6..b76bd1ad5ac3383db1fcb7f7abce040aaede3156 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleSqrt final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQRT>>
  {
-public:
-  CircleSqrt() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquare.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquare.h

index a29edfe826a77ba84ee14e52bfc78b871e48b885..3f9228b3b8b40320c17d475a3df4cdce009be320 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquare.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquare.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleSquare final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQUARE>>
  {
-public:
-  CircleSquare() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h

index b5b39f920042203c800a8367691cd8ec4fca594a..355c9f3d36a5ea273eb6b6e34233d0638e4d9719 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief SQUARED_DIFFERENCE in Circle
   */
  class CircleSquaredDifference final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SQUARED_DIFFERENCE>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SQUARED_DIFFERENCE>>
  {
-public:
-  CircleSquaredDifference() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqueeze.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqueeze.h

index f175f141149a5439e56fdada61e057bd01b7800e..ba71ff2174405bcb22a3c8f11bec3747f7fa48b6 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqueeze.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqueeze.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleSqueeze final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQUEEZE>>
  {
-public:
-  CircleSqueeze() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleStridedSlice.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleStridedSlice.h

index 98799fec1f5d8f974f4ea7332860f63e19583759..6a4155ef1fee34022056132fdd24d8029f8b0777 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleStridedSlice.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleStridedSlice.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,7 +29,7 @@ namespace luci
   * @brief STRIDED_SLICE in Circle
   */
  class CircleStridedSlice final
-    : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::STRIDED_SLICE>>
+  : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::STRIDED_SLICE>>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h

index 08208f942dbf701c53c29ab221fbdca7db051b2d..d9aaa44e577d1f20859da1d29ea9ab70154552b0 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,11 +30,8 @@ namespace luci
   * @brief SUB in Circle
   */
  class CircleSub final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SUB>>,
-                        public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                        public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
-public:
-  CircleSub() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSum.h

index 21faa76fec39fe545cee51b895086a1abbcf0828..a72e18f54c692c95e8bd8573d32281e6a1b53a58 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSum.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSum.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTanh.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTanh.h

index f7444921f0999e71b937f88fcea7f9a270ce17ea..2036a730147eae947fc51dd9f2ae7d70d79fc9a4 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTanh.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTanh.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleTanh final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::TANH>>
  {
-public:
-  CircleTanh() = default;
-
  public:
    loco::Node *x(void) const { return at(0)->node(); }
    void x(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTile.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTile.h

index 96e1f69c69b11b707392e7c054ef40e6bb616f3e..1ec2f5e823d74db46fd89e494ac5b8d4571e3281 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTile.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTile.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleTile final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TILE>>
  {
-public:
-  CircleTile() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2.h

index 3b2b5abb7396e64a5bff742cdb82356000a7d17e..0bf78c3eee89da90fe433859a2f7541dfd6b77ea 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleTopKV2 final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TOPK_V2>>
  {
-public:
-  CircleTopKV2() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2Out.h

index 5a6dd0c0221a5fc19d8d4b75dd33c147ac2e2001..f1a6b4a41ac11430875502ff84dd7cbb0349afac 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2Out.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2Out.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief Virtual CIRCLETOPKV2OUT in Circle
   */
  class CircleTopKV2Out final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLETOPKV2OUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLETOPKV2OUT>>
  {
-public:
-  CircleTopKV2Out() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h

index 095cd67466528eb0914989d80a8b88dec22a7d86..72ce0738c3a9ecd93501736acf61ae3fc86ef494 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -31,13 +31,7 @@ namespace luci
  class CircleTranspose final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TRANSPOSE>>
  {
  public:
-  CircleTranspose() = default;
-
-public:
-  /// @brief Get the input node to transpose
    loco::Node *a(void) const { return at(0)->node(); }
-
-  /// @brief Set the input node to transpose
    void a(loco::Node *node) { at(0)->node(node); }
  
    loco::Node *perm(void) const { return at(1)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h

index e355102d6bf64207dfc64457fc9c8332ebd0db60..5ae41c0c4228bcb8bf3b80262f7a8ecd485661c4 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
@@ -22,7 +22,7 @@
  
  #include "luci/IR/AttrPadding.h"
  #include "luci/IR/AttrStride.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -34,8 +34,8 @@ namespace luci
   *        'out' acutally means 'out' and 'in' of the this node.
   */
  class CircleTransposeConv final
-    : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
-      public LuciNodeMixin<LuciNodeTrait::Bias>
+  : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
+    public CircleNodeMixin<CircleNodeTrait::Bias>
  {
  public:
    loco::Node *inputSizes(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h

index 4352b045b3179dd3752e91eb60e90d7029579c37..faf0ec94df9bed6a6057450494e58fb40b473604 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h
@@ -21,7 +21,7 @@
  #include "luci/IR/CircleOpcode.h"
  
  #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,8 +30,8 @@ namespace luci
   * @brief UNIDIRECTIONAL_SEQUENCE_LSTM in Circle
   */
  class CircleUnidirectionalSequenceLSTM final
-    : public FixedArityNode<24, CircleNodeImpl<CircleOpcode::UNIDIRECTIONAL_SEQUENCE_LSTM>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public FixedArityNode<24, CircleNodeImpl<CircleOpcode::UNIDIRECTIONAL_SEQUENCE_LSTM>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
@@ -104,10 +104,10 @@ public:
    }
  
  private:
-  float _cell_clip = 0.0f;
-  float _proj_clip = 0.0f;
-  bool _time_major = false;
-  bool _asymmetric_quantize_inputs = false;
+  float _cell_clip{0.0f};
+  float _proj_clip{0.0f};
+  bool _time_major{false};
+  bool _asymmetric_quantize_inputs{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h

index 719a723625b42658251976936d65cb4c87e48294..2dd48b2f95452ea1f875802b2504062aa7e2596f 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -36,7 +36,7 @@ public:
  
  public:
    loco::DataType idx_out_type(void) const { return _idx_out_type; }
-  void output_type(loco::DataType ot) { _idx_out_type = ot; }
+  void idx_out_type(loco::DataType ot) { _idx_out_type = ot; }
  
  private:
    loco::DataType _idx_out_type{loco::DataType::S32};
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h

index f846403e0f32e09895de46b700666b556a9e2497..233351860c7b351645dfcf18782ca5e4863a8fc5 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief Virtual CIRCLEUNIQUEOUT in Circle
   */
  class CircleUniqueOut final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNIQUEOUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNIQUEOUT>>
  {
-public:
-  CircleUniqueOut() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpack.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpack.h

index cb91d7e6a560c40b7db13a7c4c6ed0593368fe42..fd0c66ce0576300b22c13345f05f8fd807a2cc9f 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpack.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpack.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleUnpack final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::UNPACK>>
  {
-public:
-  CircleUnpack() = default;
-
  public:
    loco::Node *value(void) const { return at(0)->node(); }
    void value(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpackOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpackOut.h

index 6f24578a1bec30b4651186ff9626b83e6a9dd090..640d2f1bba84f39da9a1f0bc929b38d7ccc8a3f9 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpackOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpackOut.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -29,11 +29,8 @@ namespace luci
   * @brief Virtual CIRCLEUNPACKOUT in Circle
   */
  class CircleUnpackOut final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNPACKOUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNPACKOUT>>
  {
-public:
-  CircleUnpackOut() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhere.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhere.h

index 51eda3d6e58f79cce550cd843434a6df0ada33d2..8895bcbbd2d73a6ca2d3548ddd95950477361ff5 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhere.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhere.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  #include <cassert>
  
@@ -32,9 +32,6 @@ namespace luci
   */
  class CircleWhere final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::WHERE>>
  {
-public:
-  CircleWhere() = default;
-
  public:
    loco::Node *condition() const { return at(0)->node(); }
    void condition(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhile.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhile.h

index 40ec9641473a6011b5efd8d4ee332e77a7ccc52b..f4154d3ab7f12e5efadecdab2f4a68912556e6f2 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhile.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhile.h
@@ -34,7 +34,7 @@ class CircleWhile final : public VariadicArityNode<CircleNodeImpl<CircleOpcode::
  {
  public:
    CircleWhile(uint32_t arity, uint32_t out)
-      : VariadicArityNode<CircleNodeImpl<CircleOpcode::WHILE>>(arity), _output_count(out)
+    : VariadicArityNode<CircleNodeImpl<CircleOpcode::WHILE>>(arity), _output_count(out)
    {
      assert(arity > 0);
      assert(out > 0);
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhileOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhileOut.h

index cdf617848232087e09793f7a6e74021504d7f9cf..98efc21e58bd3f6123c6500681cce65467e36adf 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhileOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhileOut.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -30,9 +30,6 @@ namespace luci
   */
  class CircleWhileOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEWHILEOUT>>
  {
-public:
-  CircleWhileOut() = default;
-
  public:
    loco::Node *input(void) const { return at(0)->node(); }
    void input(loco::Node *node) { at(0)->node(node); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleZerosLike.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleZerosLike.h

index d3b6d272a54915be80ffc04c4862cb8881de9730..9302facd0859e093067e53ea85889204ba8886ab 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleZerosLike.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleZerosLike.h
@@ -20,7 +20,7 @@
  #include "luci/IR/CircleNodeDecl.h"
  #include "luci/IR/CircleOpcode.h"
  
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
  
  namespace luci
  {
@@ -31,13 +31,7 @@ namespace luci
  class CircleZerosLike final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ZEROS_LIKE>>
  {
  public:
-  CircleZerosLike() = default;
-
-public:
-  /// @brief Get the input node
    loco::Node *input(void) const { return at(0)->node(); }
-
-  /// @brief Set the input node
    void input(loco::Node *node) { at(0)->node(node); }
  };
  
diff --git a/compiler/luci/lang/include/luci/IR/SparsityParam.h b/compiler/luci/lang/include/luci/IR/SparsityParam.h

index f471e5ef9a54b442ff8516c15637c6a702b77066..6cfff67e1d912c3abd8217926d80763134a2803e 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/SparsityParam.h
+++ b/compiler/luci/lang/include/luci/IR/SparsityParam.h
@@ -44,7 +44,7 @@ class SparseIndexVector
  public:
    SparseIndexVector() = default;
    SparseIndexVector(const SparseIndexVectorType &type, const std::vector<int32_t> &sparse_index_vec)
-      : _type{type}
+    : _type{type}
    {
      switch (type)
      {
@@ -53,7 +53,7 @@ public:
        case SparseIndexVectorType::I32:
        {
          _vec_ptr = static_cast<void *>(
-            new std::vector<int32_t>(sparse_index_vec.begin(), sparse_index_vec.end()));
+          new std::vector<int32_t>(sparse_index_vec.begin(), sparse_index_vec.end()));
          break;
        }
        case SparseIndexVectorType::U16:
@@ -90,21 +90,21 @@ public:
        case SparseIndexVectorType::I32:
        {
          const std::vector<int32_t> *vec =
-            static_cast<const std::vector<int32_t> *>(sparse_index_vec);
+          static_cast<const std::vector<int32_t> *>(sparse_index_vec);
          _vec_ptr = static_cast<void *>(new std::vector<int32_t>(vec->begin(), vec->end()));
          break;
        }
        case SparseIndexVectorType::U16:
        {
          const std::vector<uint16_t> *vec =
-            static_cast<const std::vector<uint16_t> *>(sparse_index_vec);
+          static_cast<const std::vector<uint16_t> *>(sparse_index_vec);
          _vec_ptr = static_cast<void *>(new std::vector<uint16_t>(vec->begin(), vec->end()));
          break;
        }
        case SparseIndexVectorType::U8:
        {
          const std::vector<uint8_t> *vec =
-            static_cast<const std::vector<uint8_t> *>(sparse_index_vec);
+          static_cast<const std::vector<uint8_t> *>(sparse_index_vec);
          _vec_ptr = static_cast<void *>(new std::vector<uint8_t>(vec->begin(), vec->end()));
          break;
        }
@@ -114,12 +114,12 @@ public:
    }
  
    SparseIndexVector(const SparseIndexVector &sparse_index_vec)
-      : SparseIndexVector(sparse_index_vec._type, sparse_index_vec._vec_ptr)
+    : SparseIndexVector(sparse_index_vec._type, sparse_index_vec._vec_ptr)
    {
    }
  
    SparseIndexVector(SparseIndexVector &&sparse_index_vec)
-      : _type{sparse_index_vec._type}, _vec_ptr{std::exchange(sparse_index_vec._vec_ptr, nullptr)}
+    : _type{sparse_index_vec._type}, _vec_ptr{std::exchange(sparse_index_vec._vec_ptr, nullptr)}
    {
    }
  
@@ -178,8 +178,8 @@ public:
    const std::vector<uint16_t> *as_uint16_vector(void) const
    {
      return _type == SparseIndexVectorType::U16
-               ? static_cast<const std::vector<uint16_t> *>(_vec_ptr)
-               : nullptr;
+             ? static_cast<const std::vector<uint16_t> *>(_vec_ptr)
+             : nullptr;
    }
    const std::vector<uint8_t> *as_uint8_vector(void) const
    {
@@ -202,8 +202,8 @@ public:
    }
    DimMetaData(DimensionType format, int32_t dense_size, const SparseIndexVector &array_segments,
                const SparseIndexVector &array_indices)
-      : _format{format}, _dense_size{dense_size}, _array_segments{array_segments},
-        _array_indices{array_indices}
+    : _format{format}, _dense_size{dense_size}, _array_segments{array_segments}, _array_indices{
+                                                                                   array_indices}
    {
      // DO NOTHING
    }
diff --git a/compiler/luci/lang/src/CircleDialect.cpp b/compiler/luci/lang/src/CircleDialect.cpp

index 42ca3c917690d37d439f4f08664edd4852510164..0d315fc55673ca5e4d55e614e0aa6c72928cc648 100644 (file)
--- a/compiler/luci/lang/src/CircleDialect.cpp
+++ b/compiler/luci/lang/src/CircleDialect.cpp
@@ -15,6 +15,7 @@
   */
  
  #include "luci/IR/CircleDialect.h"
+#include "luci/IR/DeadNodeQueryService.h"
  #include "luci/IR/Nodes/CircleInput.h"
  #include "luci/IR/Nodes/CircleOutput.h"
  
@@ -22,8 +23,6 @@
  #include <loco/IR/GraphInputIndex.h>
  #include <loco/IR/GraphOutputIndex.h>
  
-#include "DeadNodeQueryService.h"
-
  #include <cassert>
  #include <memory>
  
diff --git a/compiler/luci/lang/src/CircleNodeMixins.cpp b/compiler/luci/lang/src/CircleNodeMixins.cpp

new file mode 100644 (file)

index 0000000..f72178d
--- /dev/null
+++ b/compiler/luci/lang/src/CircleNodeMixins.cpp
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is to validate CircleNodeMixins.h
+#include "luci/IR/CircleNodeMixins.h"
diff --git a/compiler/luci/lang/src/CircleNodes.cpp b/compiler/luci/lang/src/CircleNodes.cpp

index c77c068616bd6592d9b9d54df53518eadc3fbf35..2c2688c9e1076b54c0be8f5fb04736add02a78a7 100644 (file)
--- a/compiler/luci/lang/src/CircleNodes.cpp
+++ b/compiler/luci/lang/src/CircleNodes.cpp
@@ -23,31 +23,6 @@
  namespace luci
  {
  
-void set_new_shape(CircleReshape *node, int32_t *base, uint32_t size)
-{
-  // Check node does not have both of new shape infos
-  LUCI_ASSERT(node->shape() == nullptr, "node already has shape input");
-  LUCI_ASSERT(node->newShape()->rank() == 0, "node already has newShape attribute");
-
-  const loco::DataType S32 = loco::DataType::S32;
-
-  // Set 2nd input as CircleConst
-  auto const_shape_node = node->graph()->nodes()->create<CircleConst>();
-  const_shape_node->rank(1);
-  const_shape_node->dim(0) = size;
-  const_shape_node->dtype(S32);
-  const_shape_node->size<S32>(size);
-  const_shape_node->shape_status(luci::ShapeStatus::VALID);
-  for (uint32_t axis = 0; axis < size; ++axis)
-    const_shape_node->at<S32>(axis) = base[axis];
-  node->shape(const_shape_node);
-
-  // Set newShape attribute
-  node->newShape()->rank(size);
-  for (uint32_t axis = 0; axis < size; ++axis)
-    node->newShape()->dim(axis) = base[axis];
-}
-
  void link(loco::GraphOutput *output, CircleOutput *node) { node->index(output->index()); }
  
  CircleOutput *output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
diff --git a/compiler/luci/lang/src/CircleShapeSignature.cpp b/compiler/luci/lang/src/CircleShapeSignature.cpp

deleted file mode 100644 (file)

index 9700002..0000000
--- a/compiler/luci/lang/src/CircleShapeSignature.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/IR/CircleShapeSignature.h"
-
-namespace luci
-{
-
-bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs)
-{
-  if (lhs.rank() != rhs.rank())
-    return false;
-
-  for (uint32_t i = 0; i < lhs.rank(); ++i)
-    if (lhs.dim(i) != rhs.dim(i))
-      return false;
-
-  return true;
-}
-
-} // namespace luci
diff --git a/compiler/luci/lang/src/DeadNodeQueryService.cpp b/compiler/luci/lang/src/DeadNodeQueryService.cpp

index a22574c94c3d86bc8173dd739f8d79f8297b6e7c..7dac08b5f31bc129d6f2b22745c077e769828aca 100644 (file)
--- a/compiler/luci/lang/src/DeadNodeQueryService.cpp
+++ b/compiler/luci/lang/src/DeadNodeQueryService.cpp
@@ -14,9 +14,8 @@
   * limitations under the License.
   */
  
-#include "DeadNodeQueryService.h"
-
  #include "luci/IR/CircleNodeVisitor.h"
+#include "luci/IR/DeadNodeQueryService.h"
  
  #include <loco/IR/Graph.h>
  
diff --git a/compiler/luci/lang/src/DeadNodeQueryService.h b/compiler/luci/lang/src/DeadNodeQueryService.h

deleted file mode 100644 (file)

index d106966..0000000
--- a/compiler/luci/lang/src/DeadNodeQueryService.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_LANG_DEADNODEQUERYSERVICE_H__
-#define __LUCI_LANG_DEADNODEQUERYSERVICE_H__
-
-#include <logo/DeadNodeQueryService.h>
-
-#include <loco/IR/Node.h>
-
-namespace luci
-{
-
-struct DeadNodeQueryServiceImpl final : public logo::DeadNodeQueryService
-{
-  bool isDeadNode(loco::Node *node) final;
-};
-
-} // namespace luci
-
-#endif // __LUCI_LANG_DEADNODEQUERYSERVICE_H__
diff --git a/compiler/luci/lang/src/LuciNodeMixins.cpp b/compiler/luci/lang/src/LuciNodeMixins.cpp

deleted file mode 100644 (file)

index 660cbe1..0000000
--- a/compiler/luci/lang/src/LuciNodeMixins.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This is to validate LuciNodeMixins.h
-#include "luci/IR/LuciNodeMixins.h"
diff --git a/compiler/luci/lang/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/lang/src/Nodes/CircleBatchMatMul.test.cpp

index d7712c8dd8be3a87c6925d8c7122289f49b0045c..3859d7fca4e856851c26726bef7fc7ace19afaa7 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleBatchMatMul.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleBatchMatMul.test.cpp
@@ -26,7 +26,7 @@ TEST(CircleBatchMatMulTest, constructor)
    luci::CircleBatchMatMul batchmatmul_node;
  
    ASSERT_EQ(luci::CircleDialect::get(), batchmatmul_node.dialect());
-  ASSERT_EQ(luci::CircleOpcode::BATCHMATMUL, batchmatmul_node.opcode());
+  ASSERT_EQ(luci::CircleOpcode::BATCH_MATMUL, batchmatmul_node.opcode());
  
    ASSERT_EQ(nullptr, batchmatmul_node.x());
    ASSERT_EQ(nullptr, batchmatmul_node.y());
diff --git a/compiler/luci/lang/src/Nodes/CircleBidrectionalSequenceLSTM.test.cpp b/compiler/luci/lang/src/Nodes/CircleBidrectionalSequenceLSTM.test.cpp

new file mode 100644 (file)

index 0000000..3f13422
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleBidrectionalSequenceLSTM.test.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleBidirectionalSequenceLSTMTest, constructor_P)
+{
+  luci::CircleBidirectionalSequenceLSTM trc_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), trc_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM, trc_node.opcode());
+
+  ASSERT_EQ(nullptr, trc_node.input());
+
+  ASSERT_EQ(nullptr, trc_node.fw_input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_input_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.fw_cell_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_cell_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_cell_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.fw_input_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.fw_forget_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.fw_cell_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.fw_output_gate_bias());
+
+  ASSERT_EQ(nullptr, trc_node.fw_projection_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_projection_bias());
+
+  ASSERT_EQ(nullptr, trc_node.bw_input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_input_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.bw_cell_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_cell_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_cell_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.bw_input_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.bw_forget_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.bw_cell_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.bw_output_gate_bias());
+
+  ASSERT_EQ(nullptr, trc_node.bw_projection_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_projection_bias());
+
+  ASSERT_EQ(nullptr, trc_node.fw_activation_state());
+  ASSERT_EQ(nullptr, trc_node.fw_cell_state());
+  ASSERT_EQ(nullptr, trc_node.bw_activation_state());
+  ASSERT_EQ(nullptr, trc_node.bw_cell_state());
+
+  ASSERT_EQ(nullptr, trc_node.auxillary_input());
+  ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_output_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_output_weights());
+
+  ASSERT_EQ(luci::FusedActFunc::UNDEFINED, trc_node.fusedActivationFunction());
+  ASSERT_EQ(0.f, trc_node.cell_clip());
+  ASSERT_EQ(0.f, trc_node.proj_clip());
+  ASSERT_EQ(false, trc_node.merge_outputs());
+  ASSERT_EQ(false, trc_node.time_major());
+  ASSERT_EQ(false, trc_node.asymmetric_quantize_inputs());
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, arity_NEG)
+{
+  luci::CircleBidirectionalSequenceLSTM trc_node;
+
+  ASSERT_NO_THROW(trc_node.arg(36));
+  ASSERT_THROW(trc_node.arg(48), std::out_of_range);
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleBidirectionalSequenceLSTM trc_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(trc_node.accept(&tv), std::exception);
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleBidirectionalSequenceLSTM trc_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(trc_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.test.cpp b/compiler/luci/lang/src/Nodes/CircleConst.test.cpp

new file mode 100644 (file)

index 0000000..a81f4b0
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleConst.test.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleConst.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleConstTest, constructor)
+{
+  luci::CircleConst const_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), const_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::CIRCLECONST, const_node.opcode());
+}
+
+TEST(CircleConstTest, dype_size)
+{
+  luci::CircleConst const_node;
+
+  const_node.dtype(loco::DataType::S32);
+  const_node.size<loco::DataType::S32>(1);
+
+  ASSERT_EQ(loco::DataType::S32, const_node.dtype());
+  ASSERT_EQ(1, const_node.size<loco::DataType::S32>());
+}
+
+TEST(CircleConstTest, scalar)
+{
+  luci::CircleConst const_node;
+
+  const_node.dtype(loco::DataType::S32);
+  const_node.size<loco::DataType::S32>(1);
+  const_node.scalar<loco::DataType::S32>() = 1;
+
+  auto const &cs = const_node.scalar<loco::DataType::S32>();
+  ASSERT_EQ(1, cs);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp b/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp

index c07268cbf66316372a4e9da8c1cb57298da8dcb8..76b70f38b9b7ca94f12931b90bd43c6ebf968f0a 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp
@@ -22,7 +22,7 @@
  
  TEST(CircleCustomTest, constructor)
  {
-  luci::CircleCustom custom_node(2);
+  luci::CircleCustom custom_node(2, 1);
  
    ASSERT_EQ(luci::CircleDialect::get(), custom_node.dialect());
    ASSERT_EQ(luci::CircleOpcode::CUSTOM, custom_node.opcode());
@@ -33,18 +33,19 @@ TEST(CircleCustomTest, constructor)
  
    ASSERT_EQ(2, custom_node.numInputs());
    ASSERT_EQ(0, custom_node.custom_code().size());
+  ASSERT_EQ(1, custom_node.numOutputs());
  }
  
  TEST(CircleCustomTest, constructor_NEG)
  {
-  ASSERT_DEBUG_DEATH(luci::CircleCustom{0}, "");
+  ASSERT_DEBUG_DEATH(luci::CircleCustom(0, 0), "");
  
    SUCCEED();
  }
  
  TEST(CircleCustomTest, invalidIndex_NEG)
  {
-  luci::CircleCustom custom_node(2);
+  luci::CircleCustom custom_node(2, 1);
  
    EXPECT_ANY_THROW(custom_node.arg(5));
  }
diff --git a/compiler/luci/lang/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/lang/src/Nodes/CircleFakeQuant.test.cpp

new file mode 100644 (file)

index 0000000..912e405
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleFakeQuant.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleFakeQuant.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleFakeQuantTest, constructor_P)
+{
+  luci::CircleFakeQuant fakequant;
+
+  ASSERT_EQ(fakequant.dialect(), luci::CircleDialect::get());
+  ASSERT_EQ(fakequant.opcode(), luci::CircleOpcode::FAKE_QUANT);
+
+  ASSERT_EQ(nullptr, fakequant.inputs());
+  ASSERT_EQ(0.0f, fakequant.min());
+  ASSERT_EQ(0.0f, fakequant.max());
+  ASSERT_EQ(0, fakequant.num_bits());
+  ASSERT_FALSE(fakequant.narrow_range());
+}
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp

index b2b9cb72b1addd52efb8b289b9fb8cd62514ad7e..f1337e3e61c6d2eddc288e3638b82ff454ca1ff1 100644 (file)
--- a/compiler/luci/logex/src/FormattedGraph.cpp
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -146,7 +146,9 @@ std::string circle_opname(uint32_t opnum)
  #define CIRCLE_NODE(OPCODE, CLASS) \
    case luci::CircleOpcode::OPCODE: \
      return prefix + #OPCODE;
+#define CIRCLE_VNODE CIRCLE_NODE
  #include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
      default:
        break;
@@ -175,7 +177,9 @@ protected:
      s.state(locop::NodeSummary::State::PartiallyKnown);                 \
      return true;                                                        \
    }
+#define CIRCLE_VNODE CIRCLE_NODE
  #include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  
  protected:
@@ -205,6 +209,7 @@ private:
    IMPLEMENT(luci::CircleAveragePool2D)
    IMPLEMENT(luci::CircleBatchMatMul)
    IMPLEMENT(luci::CircleBatchToSpaceND)
+  IMPLEMENT(luci::CircleBidirectionalSequenceLSTM)
    IMPLEMENT(luci::CircleCast)
    IMPLEMENT(luci::CircleCeil)
    IMPLEMENT(luci::CircleConcatenation)
@@ -219,6 +224,7 @@ private:
    IMPLEMENT(luci::CircleElu)
    IMPLEMENT(luci::CircleExp)
    IMPLEMENT(luci::CircleExpandDims)
+  IMPLEMENT(luci::CircleFakeQuant)
    IMPLEMENT(luci::CircleFill)
    IMPLEMENT(luci::CircleFloor)
    IMPLEMENT(luci::CircleFloorDiv)
@@ -433,6 +439,96 @@ bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceN
    return true;
  }
  
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBidirectionalSequenceLSTM *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+
+  s.args().append("fw_input_to_input_weights", tbl->lookup(node->fw_input_to_input_weights()));
+  s.args().append("fw_input_to_forget_weights", tbl->lookup(node->fw_input_to_forget_weights()));
+  s.args().append("fw_input_to_cell_weights", tbl->lookup(node->fw_input_to_cell_weights()));
+  s.args().append("fw_input_to_output_weights", tbl->lookup(node->fw_input_to_output_weights()));
+
+  s.args().append("fw_recurrent_to_input_weights",
+                  tbl->lookup(node->fw_recurrent_to_input_weights()));
+  s.args().append("fw_recurrent_to_forget_weights",
+                  tbl->lookup(node->fw_recurrent_to_forget_weights()));
+  s.args().append("fw_recurrent_to_cell_weights",
+                  tbl->lookup(node->fw_recurrent_to_cell_weights()));
+  s.args().append("fw_recurrent_to_output_weights",
+                  tbl->lookup(node->fw_recurrent_to_output_weights()));
+
+  s.args().append("fw_cell_to_input_weights", tbl->lookup(node->fw_cell_to_input_weights()));
+  s.args().append("fw_cell_to_forget_weights", tbl->lookup(node->fw_cell_to_forget_weights()));
+  s.args().append("fw_cell_to_output_weights", tbl->lookup(node->fw_cell_to_output_weights()));
+
+  s.args().append("fw_input_gate_bias", tbl->lookup(node->fw_input_gate_bias()));
+  s.args().append("fw_forget_gate_bias", tbl->lookup(node->fw_forget_gate_bias()));
+  s.args().append("fw_cell_gate_bias", tbl->lookup(node->fw_cell_gate_bias()));
+  s.args().append("fw_output_gate_bias", tbl->lookup(node->fw_output_gate_bias()));
+
+  s.args().append("fw_projection_weights", tbl->lookup(node->fw_projection_weights()));
+  s.args().append("fw_projection_bias", tbl->lookup(node->fw_projection_bias()));
+
+  s.args().append("bw_input_to_input_weights", tbl->lookup(node->bw_input_to_input_weights()));
+  s.args().append("bw_input_to_forget_weights", tbl->lookup(node->bw_input_to_forget_weights()));
+  s.args().append("bw_input_to_cell_weights", tbl->lookup(node->bw_input_to_cell_weights()));
+  s.args().append("bw_input_to_output_weights", tbl->lookup(node->bw_input_to_output_weights()));
+
+  s.args().append("bw_recurrent_to_input_weights",
+                  tbl->lookup(node->bw_recurrent_to_input_weights()));
+  s.args().append("bw_recurrent_to_forget_weights",
+                  tbl->lookup(node->bw_recurrent_to_forget_weights()));
+  s.args().append("bw_recurrent_to_cell_weights",
+                  tbl->lookup(node->bw_recurrent_to_cell_weights()));
+  s.args().append("bw_recurrent_to_output_weights",
+                  tbl->lookup(node->bw_recurrent_to_output_weights()));
+
+  s.args().append("bw_cell_to_input_weights", tbl->lookup(node->bw_cell_to_input_weights()));
+  s.args().append("bw_cell_to_forget_weights", tbl->lookup(node->bw_cell_to_forget_weights()));
+  s.args().append("bw_cell_to_output_weights", tbl->lookup(node->bw_cell_to_output_weights()));
+
+  s.args().append("bw_input_gate_bias", tbl->lookup(node->bw_input_gate_bias()));
+  s.args().append("bw_forget_gate_bias", tbl->lookup(node->bw_forget_gate_bias()));
+  s.args().append("bw_cell_gate_bias", tbl->lookup(node->bw_cell_gate_bias()));
+  s.args().append("bw_output_gate_bias", tbl->lookup(node->bw_output_gate_bias()));
+
+  s.args().append("bw_projection_weights", tbl->lookup(node->bw_projection_weights()));
+  s.args().append("bw_projection_bias", tbl->lookup(node->bw_projection_bias()));
+
+  s.args().append("fw_activation_state", tbl->lookup(node->fw_activation_state()));
+  s.args().append("fw_cell_state", tbl->lookup(node->fw_cell_state()));
+  s.args().append("bw_activation_state", tbl->lookup(node->bw_activation_state()));
+  s.args().append("bw_cell_state", tbl->lookup(node->bw_cell_state()));
+
+  s.args().append("auxillary_input", tbl->lookup(node->auxillary_input()));
+  s.args().append("fw_auxillary_input_to_input_weights",
+                  tbl->lookup(node->fw_auxillary_input_to_input_weights()));
+  s.args().append("fw_auxillary_input_to_forget_weights",
+                  tbl->lookup(node->fw_auxillary_input_to_forget_weights()));
+  s.args().append("fw_auxillary_input_to_cell_weights",
+                  tbl->lookup(node->fw_auxillary_input_to_cell_weights()));
+  s.args().append("fw_auxillary_input_to_output_weights",
+                  tbl->lookup(node->fw_auxillary_input_to_output_weights()));
+  s.args().append("bw_auxillary_input_to_input_weights",
+                  tbl->lookup(node->bw_auxillary_input_to_input_weights()));
+  s.args().append("bw_auxillary_input_to_forget_weights",
+                  tbl->lookup(node->bw_auxillary_input_to_forget_weights()));
+  s.args().append("bw_auxillary_input_to_cell_weights",
+                  tbl->lookup(node->bw_auxillary_input_to_cell_weights()));
+  s.args().append("bw_auxillary_input_to_output_weights",
+                  tbl->lookup(node->bw_auxillary_input_to_output_weights()));
+
+  s.args().append("cell_clip", to_str(node->cell_clip()));
+  s.args().append("proj_clip", to_str(node->proj_clip()));
+  s.args().append("merge_outputs", to_str(node->merge_outputs()));
+  s.args().append("time_major", to_str(node->time_major()));
+  s.args().append("asymmetric_quantize_inputs", to_str(node->asymmetric_quantize_inputs()));
+
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
  bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
                    locop::NodeSummary &s)
  {
@@ -521,6 +617,18 @@ bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *n
    return true;
  }
  
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFakeQuant *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("inputs", tbl->lookup(node->inputs()));
+  s.args().append("min", pepper::str(node->min()));
+  s.args().append("max", pepper::str(node->max()));
+  s.args().append("num_bits", pepper::str(node->num_bits()));
+  s.args().append("narrow_range", node->narrow_range() ? "true" : "false");
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
  bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
                    locop::NodeSummary &s)
  {
@@ -1189,7 +1297,9 @@ bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSumm
      s.comments().append("Mem = " + ptr_to_str(node));     \
      return summary(dynamic_cast<const CLASS *>(node), s); \
    }
+#define CIRCLE_VNODE CIRCLE_NODE
  #include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
  #undef CIRCLE_NODE
  
    return false;
@@ -1238,6 +1348,12 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
    return summary_node(tbl(), node, s);
  }
  
+bool CircleNodeSummaryBuilder::summary(const luci::CircleBidirectionalSequenceLSTM *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
  bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
  {
    return summary_node(tbl(), node, s);
@@ -1314,6 +1430,17 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleExpandDims *node,
    return summary_node(tbl(), node, s);
  }
  
+bool CircleNodeSummaryBuilder::summary(const luci::CircleFakeQuant *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
  bool CircleNodeSummaryBuilder::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
  {
    return use_x(tbl(), node, s);
@@ -1331,11 +1458,6 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleFloorMod *node,
    return use_xy(tbl(), node, s);
  }
  
-bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
  bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node,
                                         locop::NodeSummary &s) const
  {
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt

new file mode 100644 (file)

index 0000000..838642b
--- /dev/null
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -0,0 +1,29 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_partition SHARED ${SOURCES})
+target_include_directories(luci_partition PRIVATE src)
+target_include_directories(luci_partition PUBLIC include)
+target_link_libraries(luci_partition PUBLIC luci_lang)
+target_link_libraries(luci_partition PRIVATE luci_service)
+target_link_libraries(luci_partition PRIVATE luci_log)
+target_link_libraries(luci_partition PRIVATE luci_logex)
+target_link_libraries(luci_partition PRIVATE mio_circle)
+target_link_libraries(luci_partition PRIVATE nncc_common)
+target_link_libraries(luci_partition PRIVATE oops)
+
+install(TARGETS luci_partition DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_partition_test ${TESTS})
+target_include_directories(luci_partition_test PRIVATE src)
+target_link_libraries(luci_partition_test luci_lang)
+target_link_libraries(luci_partition_test luci_partition)
+target_link_libraries(luci_partition_test luci_testhelper)
+target_link_libraries(luci_partition_test luci_service)
diff --git a/compiler/luci/partition/README.md b/compiler/luci/partition/README.md

new file mode 100644 (file)

index 0000000..40a46bc
--- /dev/null
+++ b/compiler/luci/partition/README.md
@@ -0,0 +1,4 @@
+# luci-partition
+
+`luci-partition` provides partition of a model to two or more sub models and
+its connection configuration having same computational results.
diff --git a/compiler/luci/partition/include/luci/Partition.h b/compiler/luci/partition/include/luci/Partition.h

new file mode 100644 (file)

index 0000000..cf90e44
--- /dev/null
+++ b/compiler/luci/partition/include/luci/Partition.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_H__
+#define __LUCI_PARTITION_H__
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace luci
+{
+
+/**
+ * @brief PartitionTable holds partition information
+ */
+struct PartitionTable
+{
+  std::vector<std::string> groups;
+  std::string default_group;
+
+  // assign by opcode name: OPCODENAME=group
+  std::unordered_map<std::string /* OPCODENAME */, std::string /* group */> byopcodes;
+
+  // TODO add assign by OP name
+};
+
+/**
+ * @brief PartedModule holds partitioned module and group name
+ */
+struct PartedModule
+{
+  std::unique_ptr<Module> module;
+  // group name used to partition this module
+  std::string group;
+
+  // unique name(filename) of this module
+  std::string name;
+};
+
+struct PartedModules
+{
+  std::vector<PartedModule> pmodules;
+
+  // TODO add connections ?
+};
+
+/**
+ * @brief Method to do paritioning from module and PartitionTable to produce PartedModules
+ */
+PartedModules apply(Module *module, const PartitionTable &partition);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_H__
diff --git a/compiler/luci/partition/src/CircleOpCode.cpp b/compiler/luci/partition/src/CircleOpCode.cpp

new file mode 100644 (file)

index 0000000..86694fa
--- /dev/null
+++ b/compiler/luci/partition/src/CircleOpCode.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOpCode.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <mio/circle/schema_generated.h>
+
+namespace
+{
+
+using namespace luci;
+using namespace circle;
+
+class QueryOpCode final : public CircleNodeVisitor<BuiltinOperator>
+{
+public:
+// NOTE only circle operator may have BuiltinOperator_XXX
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
+  BuiltinOperator visit(const CIRCLE_CLASS *) final { return BuiltinOperator_##OPCODE; }
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS)
+
+#include "luci/IR/CircleNodes.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+  // NOTE only builtin operators should be called (NOT virtual nodes)
+};
+
+class QueryCircleName final : public luci::CircleNodeVisitor<const char *>
+{
+public:
+// NOTE provide names for circle virtual nodes
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS)
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) \
+  const char *visit(const CIRCLE_CLASS *) final { return #OPCODE; }
+
+#include "luci/IR/CircleNodes.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+  // default is null
+  const char *visit(const luci::CircleNode *) final { return nullptr; }
+};
+
+} // namespace
+
+namespace luci
+{
+
+std::string opcode_name(const CircleNode *node)
+{
+  QueryCircleName qcn;
+  auto cname = node->accept(&qcn);
+  if (cname != nullptr)
+    return std::string(cname);
+
+  QueryOpCode qoc;
+  auto opcode = node->accept(&qoc);
+  auto name = circle::EnumNameBuiltinOperator(opcode);
+  return std::string(name);
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/CircleOpCode.h b/compiler/luci/partition/src/CircleOpCode.h

new file mode 100644 (file)

index 0000000..d17b092
--- /dev/null
+++ b/compiler/luci/partition/src/CircleOpCode.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_CIRCLE_OP_CODE_H__
+#define __LUCI_PARTITION_CIRCLE_OP_CODE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <string>
+
+namespace luci
+{
+
+std::string opcode_name(const CircleNode *node);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_CIRCLE_OP_CODE_H__
diff --git a/compiler/luci/partition/src/CircleOpCode.test.cpp b/compiler/luci/partition/src/CircleOpCode.test.cpp

new file mode 100644 (file)

index 0000000..d2524a2
--- /dev/null
+++ b/compiler/luci/partition/src/CircleOpCode.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOpCode.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+TEST(CircleOpCodeTest, name)
+{
+  auto g = loco::make_graph();
+  auto node = g->nodes()->create<luci::CircleSqrt>();
+
+  auto name = luci::opcode_name(node);
+  ASSERT_EQ(name, "SQRT");
+}
diff --git a/compiler/luci/partition/src/ConnectNode.cpp b/compiler/luci/partition/src/ConnectNode.cpp

new file mode 100644 (file)

index 0000000..336be7c
--- /dev/null
+++ b/compiler/luci/partition/src/ConnectNode.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext)
+{
+  ConnectNode cn(clonecontext);
+  node->accept(&cn);
+}
+
+luci::CircleNode *ConnectNode::find_clone(const luci::CircleNode *node)
+{
+  auto it = _clonecontext.find(node);
+  if (it == _clonecontext.end())
+    throw oops::UserExn("Invalid node in ConnectNode");
+  return it->second;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/ConnectNode.h b/compiler/luci/partition/src/ConnectNode.h

new file mode 100644 (file)

index 0000000..017c587
--- /dev/null
+++ b/compiler/luci/partition/src/ConnectNode.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_CONNECT_NODE_H__
+#define __LUCI_PARTITION_CONNECT_NODE_H__
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @note MapNode2Clone is used as a map from original node to cloned node
+ *       to find input of a cloned node
+ *
+ *   (Original)              (Clone)
+ *
+ *     [A]                  [A']
+ *      |   [B]              |   [B']
+ *      |    |               |    |
+ *       \  /                 \  /
+ *        [C]                 [C']
+ *
+ *  From view of [C'] we need to find [A'] and [B']. We know [C] from [C'],
+ *  then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B']
+ *  from the map.
+ */
+using MapNode2Clone = std::map<const CircleNode * /* ORG */, CircleNode * /* CLONE */>;
+
+struct CloneContext
+{
+  std::pair<MapNode2Clone::iterator, bool> emplace(const CircleNode *org, CircleNode *clone)
+  {
+    return node2clone.emplace(org, clone);
+  }
+  MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); }
+  MapNode2Clone::iterator end(void) { return node2clone.end(); }
+
+  MapNode2Clone node2clone;
+};
+
+class ConnectNode final : public luci::CircleNodeVisitor<void>
+{
+public:
+  ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){};
+
+public:
+  // void visit(const luci::CircleAbs *) final;
+  void visit(const luci::CircleAdd *) final;
+  // void visit(const luci::CircleAddN *) final;
+  // void visit(const luci::CircleArgMax *) final;
+  // void visit(const luci::CircleArgMin *) final;
+  // void visit(const luci::CircleAveragePool2D *) final;
+  // void visit(const luci::CircleBatchMatMul *) final;
+  // void visit(const luci::CircleBatchToSpaceND *) final;
+  // void visit(const luci::CircleCast *) final;
+  // void visit(const luci::CircleCeil *) final;
+  // void visit(const luci::CircleConcatenation *) final;
+  void visit(const luci::CircleConst *) final;
+  // void visit(const luci::CircleConv2D *) final;
+  // void visit(const luci::CircleCos *) final;
+  // void visit(const luci::CircleCustom *) final;
+  // void visit(const luci::CircleDepthToSpace *) final;
+  // void visit(const luci::CircleDepthwiseConv2D *) final;
+  // void visit(const luci::CircleDequantize *) final;
+  void visit(const luci::CircleDiv *) final;
+  // void visit(const luci::CircleElu *) final;
+  // void visit(const luci::CircleEqual *) final;
+  // void visit(const luci::CircleExp *) final;
+  // void visit(const luci::CircleExpandDims *) final;
+  // void visit(const luci::CircleFakeQuant *) final;
+  // void visit(const luci::CircleFill *) final;
+  // void visit(const luci::CircleFloor *) final;
+  // void visit(const luci::CircleFloorDiv *) final;
+  // void visit(const luci::CircleFloorMod *) final;
+  // void visit(const luci::CircleFullyConnected *) final;
+  // void visit(const luci::CircleGather *) final;
+  // void visit(const luci::CircleGatherNd *) final;
+  // void visit(const luci::CircleGreater *) final;
+  // void visit(const luci::CircleGreaterEqual *) final;
+  // void visit(const luci::CircleIf *) final;
+  // void visit(const luci::CircleL2Normalize *) final;
+  // void visit(const luci::CircleL2Pool2D *) final;
+  // void visit(const luci::CircleLeakyRelu *) final;
+  // void visit(const luci::CircleLess *) final;
+  // void visit(const luci::CircleLessEqual *) final;
+  // void visit(const luci::CircleLocalResponseNormalization *) final;
+  // void visit(const luci::CircleLog *) final;
+  // void visit(const luci::CircleLogicalAnd *) final;
+  // void visit(const luci::CircleLogicalNot *) final;
+  // void visit(const luci::CircleLogicalOr *) final;
+  // void visit(const luci::CircleLogistic *) final;
+  // void visit(const luci::CircleLogSoftmax *) final;
+  // void visit(const luci::CircleMatrixDiag *) final;
+  // void visit(const luci::CircleMatrixSetDiag *) final;
+  // void visit(const luci::CircleMaximum *) final;
+  // void visit(const luci::CircleMaxPool2D *) final;
+  void visit(const luci::CircleMean *) final;
+  // void visit(const luci::CircleMinimum *) final;
+  // void visit(const luci::CircleMirrorPad *) final;
+  void visit(const luci::CircleMul *) final;
+  // void visit(const luci::CircleNeg *) final;
+  // void visit(const luci::CircleNonMaxSuppressionV4 *) final;
+  // void visit(const luci::CircleNonMaxSuppressionV5 *) final;
+  // void visit(const luci::CircleNotEqual *) final;
+  // void visit(const luci::CircleOneHot *) final;
+  // void visit(const luci::CirclePack *) final;
+  // void visit(const luci::CirclePad *) final;
+  // void visit(const luci::CirclePadV2 *) final;
+  void visit(const luci::CirclePow *) final;
+  // void visit(const luci::CirclePRelu *) final;
+  // void visit(const luci::CircleRange *) final;
+  // void visit(const luci::CircleRank *) final;
+  // void visit(const luci::CircleReduceAny *) final;
+  // void visit(const luci::CircleReduceMax *) final;
+  // void visit(const luci::CircleReduceMin *) final;
+  // void visit(const luci::CircleReduceProd *) final;
+  // void visit(const luci::CircleRelu *) final;
+  // void visit(const luci::CircleRelu6 *) final;
+  // void visit(const luci::CircleReluN1To1 *) final;
+  // void visit(const luci::CircleReshape *) final;
+  // void visit(const luci::CircleResizeBilinear *) final;
+  // void visit(const luci::CircleResizeNearestNeighbor *) final;
+  // void visit(const luci::CircleReverseSequence *) final;
+  // void visit(const luci::CircleReverseV2 *) final;
+  // void visit(const luci::CircleRound *) final;
+  void visit(const luci::CircleRsqrt *) final;
+  // void visit(const luci::CircleScatterNd *) final;
+  // void visit(const luci::CircleSegmentSum *) final;
+  // void visit(const luci::CircleSelect *) final;
+  // void visit(const luci::CircleSelectV2 *) final;
+  // void visit(const luci::CircleShape *) final;
+  // void visit(const luci::CircleSin *) final;
+  // void visit(const luci::CircleSlice *) final;
+  // void visit(const luci::CircleSoftmax *) final;
+  // void visit(const luci::CircleSpaceToBatchND *) final;
+  // void visit(const luci::CircleSpaceToDepth *) final;
+  // void visit(const luci::CircleSparseToDense *) final;
+  // void visit(const luci::CircleSplit *) final;
+  // void visit(const luci::CircleSplitV *) final;
+  void visit(const luci::CircleSqrt *) final;
+  // void visit(const luci::CircleSquare *) final;
+  void visit(const luci::CircleSquaredDifference *) final;
+  // void visit(const luci::CircleSqueeze *) final;
+  // void visit(const luci::CircleStridedSlice *) final;
+  void visit(const luci::CircleSub *) final;
+  // void visit(const luci::CircleSum *) final;
+  // void visit(const luci::CircleTanh *) final;
+  // void visit(const luci::CircleTile *) final;
+  // void visit(const luci::CircleTopKV2 *) final;
+  // void visit(const luci::CircleTranspose *) final;
+  // void visit(const luci::CircleTransposeConv *) final;
+  // void visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
+  // void visit(const luci::CircleUnique *) final;
+  // void visit(const luci::CircleUnpack *) final;
+  // void visit(const luci::CircleWhere *) final;
+  // void visit(const luci::CircleWhile *) final;
+  // void visit(const luci::CircleZerosLike *) final;
+
+  // Circle Only
+  // void visit(const luci::CircleBCQFullyConnected *) final;
+  // void visit(const luci::CircleBCQGather *) final;
+  // void visit(const luci::CircleInstanceNorm *) final;
+
+  // Virtual
+  // void visit(const luci::CircleCustomOut *) final;
+  // void visit(const luci::CircleIfOut *) final;
+  // void visit(const luci::CircleInput *) final;
+  // void visit(const luci::CircleNonMaxSuppressionV4Out *) final;
+  // void visit(const luci::CircleNonMaxSuppressionV5Out *) final;
+  // void visit(const luci::CircleOutput *) final;
+  // void visit(const luci::CircleOutputDummy *) final;
+  // void visit(const luci::CircleOutputExclude *) final;
+  // void visit(const luci::CircleSplitOut *) final;
+  // void visit(const luci::CircleSplitVOut *) final;
+  // void visit(const luci::CircleTopKV2Out *) final;
+  // void visit(const luci::CircleUniqueOut *) final;
+  // void visit(const luci::CircleUnpackOut *) final;
+  // void visit(const luci::CircleWhileOut *) final;
+
+public:
+  luci::CircleNode *find_clone(const luci::CircleNode *node);
+
+protected:
+  luci::CloneContext &_clonecontext;
+};
+
+/**
+ * @brief Connect cloned node from input node
+ */
+void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_CONNECT_NODE_H__
diff --git a/compiler/luci/partition/src/ConnectNode.test.cpp b/compiler/luci/partition/src/ConnectNode.test.cpp

new file mode 100644 (file)

index 0000000..a2009c6
--- /dev/null
+++ b/compiler/luci/partition/src/ConnectNode.test.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.test.h"
+
+// This file validates "ConnectNode.test.h". Please DO NOT remove this file.
diff --git a/compiler/luci/partition/src/ConnectNode.test.h b/compiler/luci/partition/src/ConnectNode.test.h

new file mode 100644 (file)

index 0000000..f7333ff
--- /dev/null
+++ b/compiler/luci/partition/src/ConnectNode.test.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONNECT_NODE_TEST_H__
+#define __CONNECT_NODE_TEST_H__
+
+#include "ConnectNode.h"
+
+#include <luci/Service/CircleNodeClone.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <loco/IR/Graph.h>
+
+#include <initializer_list>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+namespace luci
+{
+namespace test
+{
+
+template <unsigned N> class TestIsOGraph : public TestIsGraphlet<N>, public TestOGraphlet
+{
+public:
+  TestIsOGraph() = default;
+
+public:
+  virtual void init(const std::initializer_list<ShapeU32> shape_in, const ShapeU32 shape_out)
+  {
+    if (shape_in.size() != N)
+      throw std::runtime_error("Failed to init TestIsOGraph");
+
+    TestIsGraphlet<N>::init(TestIsGraphlet<N>::g(), shape_in);
+    TestOGraphlet::init(TestIsGraphlet<N>::g(), shape_out);
+  }
+};
+
+template <class T> class NodeGraphletT
+{
+public:
+  virtual void init(loco::Graph *g)
+  {
+    _node = g->nodes()->create<T>();
+    _node->dtype(loco::DataType::S32);
+    _node->name("node");
+  }
+
+  T *node(void) const { return _node; }
+
+protected:
+  T *_node{nullptr};
+};
+
+template <class T> class NodeIsGraphletT
+{
+public:
+  virtual void init(loco::Graph *g, uint32_t n)
+  {
+    _node = g->nodes()->create<T>(n);
+    _node->dtype(loco::DataType::S32);
+    _node->name("node");
+  }
+
+  T *node(void) const { return _node; }
+
+protected:
+  T *_node{nullptr};
+};
+
+/**
+ * @brief ConnectionTestHelper provides common framework for testing
+ *        cloned CircleNode connection
+ */
+class ConnectionTestHelper
+{
+public:
+  ConnectionTestHelper() { _graph_clone = loco::make_graph(); }
+
+public:
+  template <unsigned N> void prepare_inputs(TestIsOGraph<N> *isograph)
+  {
+    assert(N == isograph->num_inputs());
+
+    for (uint32_t i = 0; i < N; ++i)
+    {
+      auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+      luci::copy_common_attributes(isograph->input(i), input);
+      _clonectx.emplace(isograph->input(i), input);
+      _inputs.push_back(input);
+    }
+  }
+
+  /**
+   * @note prepare_inputs_miss is for negative testing
+   */
+  template <unsigned N> void prepare_inputs_miss(TestIsOGraph<N> *isograph)
+  {
+    assert(N == isograph->num_inputs());
+
+    for (uint32_t i = 0; i < N; ++i)
+    {
+      auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+      luci::copy_common_attributes(isograph->input(i), input);
+      if (i != 0)
+        _clonectx.emplace(isograph->input(i), input);
+      _inputs.push_back(input);
+    }
+  }
+
+  void clone_connect(luci::CircleNode *node, luci::CircleNode *clone)
+  {
+    _clonectx.emplace(node, clone);
+
+    luci::clone_connect(node, _clonectx);
+  }
+
+public:
+  loco::Graph *graph_clone(void) { return _graph_clone.get(); }
+
+  luci::CircleNode *inputs(uint32_t idx) { return _inputs.at(idx); }
+
+protected:
+  luci::CloneContext _clonectx;
+  std::vector<luci::CircleInput *> _inputs;
+  std::unique_ptr<loco::Graph> _graph_clone; // graph for clones
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __CONNECT_NODE_TEST_H__
diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.cpp

new file mode 100644 (file)

index 0000000..d393997
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAdd.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleAdd *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleAdd *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleAdd *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp

new file mode 100644 (file)

index 0000000..e457b83
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleAdd>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleAdd>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Add)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Add_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleConst.cpp b/compiler/luci/partition/src/Nodes/CircleConst.cpp

new file mode 100644 (file)

index 0000000..118cd8d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleConst.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleConst *)
+{
+  // Nothing to do
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.cpp

new file mode 100644 (file)

index 0000000..4803385
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDiv.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDiv *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleDiv *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDiv *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp

new file mode 100644 (file)

index 0000000..2269323
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDiv>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleDiv>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Div)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Div_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMean.cpp b/compiler/luci/partition/src/Nodes/CircleMean.cpp

new file mode 100644 (file)

index 0000000..b634e58
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMean.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMean *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMean *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *reduction_indices =
+    loco::must_cast<luci::CircleNode *>(node->reduction_indices());
+
+  cloned->input(cn->find_clone(input));
+  cloned->reduction_indices(cn->find_clone(reduction_indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMean *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMul.cpp b/compiler/luci/partition/src/Nodes/CircleMul.cpp

new file mode 100644 (file)

index 0000000..2cd2b40
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMul.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMul *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMul *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMul *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp

new file mode 100644 (file)

index 0000000..99cf082
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMul>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    NodeGraphletT<luci::CircleMul>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Mul)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Mul_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CirclePow.cpp b/compiler/luci/partition/src/Nodes/CirclePow.cpp

new file mode 100644 (file)

index 0000000..fb180ee
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePow.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CirclePow *node)
+{
+  auto *cloned = loco::must_cast<luci::CirclePow *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CirclePow *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp

new file mode 100644 (file)

index 0000000..03e64aa
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleRsqrt *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleRsqrt *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleRsqrt *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp

new file mode 100644 (file)

index 0000000..f737aac
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSqrt *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSqrt *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSqrt *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp

new file mode 100644 (file)

index 0000000..40dd317
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSquaredDifference *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSquaredDifference *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSquaredDifference *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSub.cpp b/compiler/luci/partition/src/Nodes/CircleSub.cpp

new file mode 100644 (file)

index 0000000..8ac294b
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSub.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSub *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSub *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSub *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp

new file mode 100644 (file)

index 0000000..7c0d837
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSub>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    NodeGraphletT<luci::CircleSub>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Sub)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Sub_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Partition.cpp b/compiler/luci/partition/src/Partition.cpp

new file mode 100644 (file)

index 0000000..cc7106c
--- /dev/null
+++ b/compiler/luci/partition/src/Partition.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+#include "PartitionIRDump.h"
+#include "PartitionPGroups.h"
+#include "PartitionMerge.h"
+#include "PartitionCleanup.h"
+#include "PartitionPModules.h"
+#include "PartitionPModulesDump.h"
+
+#include "luci/Partition.h"
+#include "luci/Log.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @brief This will return Partitioned Modules object
+ */
+PartedModules apply(Module *source, const PartitionTable &partition)
+{
+  assert(source != nullptr);
+
+  LOGGER(l);
+
+  auto pgroups = produce_pgroups(source, partition);
+  INFO(l) << "--- Partition Graph (1)------------------------";
+  INFO(l) << pgroups.get();
+
+  auto mpgroups = merge_pgroups(pgroups.get());
+  INFO(l) << "--- Partition Graph (2)------------------------";
+  INFO(l) << mpgroups.get();
+
+  remove_unused_inputoutputs(mpgroups.get(), source);
+  INFO(l) << "--- Partition Graph (3)------------------------";
+  INFO(l) << mpgroups.get();
+
+  auto pmodules = produce_pmodules(mpgroups.get());
+  INFO(l) << "--- Modules -----------------------------------";
+  INFO(l) << &pmodules;
+
+  return pmodules;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Partition.test.cpp b/compiler/luci/partition/src/Partition.test.cpp

new file mode 100644 (file)

index 0000000..9e24c44
--- /dev/null
+++ b/compiler/luci/partition/src/Partition.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Partition.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->dtype(loco::DataType::S32);
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  SqrtGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    SqrtGraphlet::init(g(), shape);
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+  }
+};
+
+} // namespace
+
+TEST(PartitionTest, simple_apply)
+{
+  luci::Module module;
+
+  SqrtGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  luci::PartitionTable pt;
+  pt.default_group = "A";
+
+  auto pms = apply(&module, pt);
+
+  ASSERT_EQ(1, pms.pmodules.size());
+
+  auto &pm = *pms.pmodules.begin();
+  ASSERT_NE(nullptr, pm.module->graph());
+}
diff --git a/compiler/luci/partition/src/PartitionCleanup.cpp b/compiler/luci/partition/src/PartitionCleanup.cpp

new file mode 100644 (file)

index 0000000..6545295
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionCleanup.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionCleanup.h"
+
+#include "luci/Log.h"
+
+namespace
+{
+
+using CircleNodes = std::vector<luci::CircleNode *>;
+
+/**
+ * @note Original source outputs should be outputs
+ */
+void gather_graph_outputs(CircleNodes &nodes, const luci::Module *source)
+{
+  // graph outputs are treated as used
+  auto graph = source->graph();
+  for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
+  {
+    auto output = luci::output_node(graph, n); // output is CircleOutput
+    assert(output != nullptr);
+
+    auto node = loco::must_cast<luci::CircleNode *>(output->from());
+
+    nodes.push_back(node);
+  }
+
+  // TODO add unused virtual outputs
+}
+
+/**
+ * @note If one PGroup requires an input, that input should be an output
+ *        from another PGroup
+ */
+void gather_pgroups_outputs(CircleNodes &nodes, const luci::PGroups *pgroups)
+{
+  // input of a pgroup is used output
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    for (auto input : pgroup->inputs)
+    {
+      nodes.push_back(input);
+    }
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void remove_unused_inputoutputs(luci::PGroups *pgroups, const luci::Module *source)
+{
+  assert(source != nullptr);
+  assert(pgroups != nullptr);
+
+  LOGGER(l);
+
+  // TODO support multiple subgraph
+  assert(source->size() == 1);
+
+  INFO(l) << "--- Cleanup unused inputs/outputs";
+
+  // remove input within same pgroup
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    bool changed;
+    do
+    {
+      changed = false;
+      for (auto it = pgroup->inputs.begin(); it != pgroup->inputs.end(); ++it)
+      {
+        auto input = *it;
+        if (pgroups->pgroup_of(input) == pgroup.get())
+        {
+          INFO(l) << "  Cleanup input " << input->name() << " from group " << pgroup->group;
+          pgroup->inputs.erase(it);
+          changed = true;
+          break;
+        }
+        // NOTE CircleConst is one of input type, as they are registered as
+        //      input to some node and then (should be) merged.
+        //      Remove if this input is CircleConst
+        if (dynamic_cast<CircleConst *>(input) != nullptr)
+        {
+          INFO(l) << "  Cleanup CircleConst " << input->name() << " from group " << pgroup->group;
+          pgroup->inputs.erase(it);
+          changed = true;
+          break;
+        }
+      }
+    } while (changed);
+  }
+
+  // remove unused output(s)
+  // 'used_outputs' will hold actual used outputs for all PGroups
+  CircleNodes used_outputs;
+
+  gather_graph_outputs(used_outputs, source);
+  gather_pgroups_outputs(used_outputs, pgroups);
+
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    bool changed;
+    do
+    {
+      changed = false;
+      for (auto it = pgroup->outputs.begin(); it != pgroup->outputs.end(); ++it)
+      {
+        auto output = *it;
+        auto oit = std::find(used_outputs.begin(), used_outputs.end(), output);
+        if (oit == used_outputs.end())
+        {
+          INFO(l) << "  Cleanup output " << output->name() << " from group " << pgroup->group;
+          pgroup->outputs.erase(it);
+          changed = true;
+          break;
+        }
+      }
+    } while (changed);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionCleanup.h b/compiler/luci/partition/src/PartitionCleanup.h

new file mode 100644 (file)

index 0000000..f81b4a7
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionCleanup.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_CLEANUP_H__
+#define __LUCI_PARTITON_CLEANUP_H__
+
+#include "PartitionIR.h"
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+/**
+ * @brief This will remove unused inputs/outputs in each pgroup of pgroups
+ */
+void remove_unused_inputoutputs(luci::PGroups *, const luci::Module *);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_CLEANUP_H__
diff --git a/compiler/luci/partition/src/PartitionIR.cpp b/compiler/luci/partition/src/PartitionIR.cpp

new file mode 100644 (file)

index 0000000..ebd6b25
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIR.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+#include "CircleOpCode.h"
+
+#include "luci/Log.h"
+
+#include <cassert>
+#include <ostream>
+#include <iostream>
+
+namespace luci
+{
+
+std::unique_ptr<PGroups> PGroups::make_copy(void) const
+{
+  auto d_pgroups = std::make_unique<luci::PGroups>();
+
+  for (auto &s_pgroup : pgroups)
+  {
+    // make a copy of s_pgroup to d_pgroup
+    std::unique_ptr<luci::PGroup> d_pgroup = std::make_unique<luci::PGroup>();
+
+    d_pgroup->group = s_pgroup->group;
+    d_pgroup->id = s_pgroup->id;
+
+    for (auto &pnode : s_pgroup->pnodes)
+    {
+      auto pnodec = std::make_unique<luci::PNode>();
+      pnodec->node = pnode->node;
+      pnodec->group = pnode->group;
+      pnodec->pgroup = d_pgroup.get();
+      d_pgroup->pnodes.push_back(std::move(pnodec));
+    }
+
+    for (auto &input : s_pgroup->inputs)
+      d_pgroup->inputs.push_back(input);
+
+    for (auto &output : s_pgroup->outputs)
+      d_pgroup->outputs.push_back(output);
+
+    // copy node2group
+    for (auto it = node2group.begin(); it != node2group.end(); ++it)
+      d_pgroups->node2group[it->first] = it->second;
+
+    // build id2pgroup
+    d_pgroups->id2pgroup[d_pgroup->id] = d_pgroup.get();
+
+    d_pgroups->pgroups.push_back(std::move(d_pgroup));
+    // note: d_pgroup is now nullptr as it's moved
+  }
+
+  return std::move(d_pgroups);
+}
+
+std::string PGroups::group_of(luci::CircleNode *node) const
+{
+  assert(node != nullptr);
+
+  LOGGER(l);
+
+  auto it = node2group.find(node);
+  if (it == node2group.end())
+  {
+    INFO(l) << "PGroups::group_of " << node << "(" << node->name() << ") not found" << std::endl;
+    return "";
+  }
+  return it->second;
+}
+
+const PGroup *PGroups::pgroup_of(luci::CircleNode *node) const
+{
+  assert(node != nullptr);
+
+  for (auto &pgroup : pgroups)
+  {
+    for (auto &pnode : pgroup->pnodes)
+    {
+      if (node == pnode->node)
+        return pgroup.get();
+    }
+  }
+  // node maybe graph input (CircleInput)
+  return nullptr;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionIR.h b/compiler/luci/partition/src/PartitionIR.h

new file mode 100644 (file)

index 0000000..852e38c
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIR.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_IR_H__
+#define __LUCI_PARTITION_IR_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+struct PGroup;
+
+/**
+ * @brief Partition Node with CircleNode with group name
+ * @note  node just points to source luci::CircleNode, NOT the cloned node
+ *        CloneContext is used to find cloned node from source node
+ */
+struct PNode
+{
+  const luci::CircleNode *node = nullptr;
+  std::string group;
+
+  const PGroup *pgroup = nullptr;
+};
+
+/**
+ * @brief Partition Group with Partition Nodes of same group and I/Os nodes
+ */
+struct PGroup
+{
+  std::vector<std::unique_ptr<PNode>> pnodes;
+  std::string group;
+  uint32_t id = 0;
+
+  // I/O while partitioning
+  std::vector<luci::CircleNode *> inputs;
+  std::vector<luci::CircleNode *> outputs;
+};
+
+struct PGroups
+{
+  std::vector<std::unique_ptr<PGroup>> pgroups;
+
+  // node2group is to find group key from source node
+  std::map<const luci::CircleNode *, std::string> node2group;
+
+  // id2pngroup is to find *pngroup from pngroup id
+  std::map<uint32_t, PGroup *> id2pgroup;
+
+  // default group key for reference
+  std::string default_group;
+
+public:
+  /**
+   * @brief return a copy of PGroups
+   */
+  std::unique_ptr<PGroups> make_copy(void) const;
+
+  /**
+   * @brief return group key of node, empty string if not found
+   */
+  std::string group_of(luci::CircleNode *node) const;
+
+  /**
+   * @brief return holding pgroup of node, nullptr if not found
+   */
+  const PGroup *pgroup_of(luci::CircleNode *node) const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_IR_H__
diff --git a/compiler/luci/partition/src/PartitionIR.test.cpp b/compiler/luci/partition/src/PartitionIR.test.cpp

new file mode 100644 (file)

index 0000000..4c051a9
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIR.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleAdd.h>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+TEST(PartitionIRTest, PNode_ctor)
+{
+  auto g = loco::make_graph();
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  luci::PNode pnode;
+  pnode.node = node;
+
+  ASSERT_NE(nullptr, pnode.node);
+  ASSERT_EQ(nullptr, pnode.pgroup);
+}
+
+// TODO add more tests with luci::PNode
+
+TEST(PartitionIRTest, PGroup_ctor)
+{
+  auto g = loco::make_graph();
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  luci::PGroup pgroup;
+  auto pnode = std::make_unique<luci::PNode>();
+  pnode->node = node;
+
+  pgroup.pnodes.push_back(std::move(pnode));
+
+  ASSERT_NE(pgroup.pnodes.end(), pgroup.pnodes.begin());
+  ASSERT_EQ(0, pgroup.inputs.size());
+  ASSERT_EQ(0, pgroup.outputs.size());
+}
+
+// TODO add more tests with luci::PGroup
+
+TEST(PartitionIRTest, PGroups_ctor)
+{
+  auto g = loco::make_graph();
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  auto pnode = std::make_unique<luci::PNode>();
+  pnode->node = node;
+
+  auto pgroup = std::make_unique<luci::PGroup>();
+  pgroup->pnodes.push_back(std::move(pnode));
+
+  luci::PGroups pgroups;
+  pgroups.pgroups.push_back(std::move(pgroup));
+
+  ASSERT_NE(pgroups.pgroups.end(), pgroups.pgroups.begin());
+}
+
+// TODO add more tests with luci::PGroups
diff --git a/compiler/luci/partition/src/PartitionIRDump.cpp b/compiler/luci/partition/src/PartitionIRDump.cpp

new file mode 100644 (file)

index 0000000..4f2c268
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIRDump.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIRDump.h"
+
+#include "CircleOpCode.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PNode *pnode)
+{
+  os << "PNode: " << pnode->group << ", " << pnode->node << ":" << luci::opcode_name(pnode->node)
+     << ":" << pnode->node->name() << std::endl;
+}
+
+void dump(std::ostream &os, const PGroup *pgroup)
+{
+  os << "--- PGroup: " << pgroup->group << std::endl;
+  os << "Input(s): ";
+  for (auto &node_in : pgroup->inputs)
+    os << node_in->name() << " ";
+  os << std::endl;
+  for (auto &pnode : pgroup->pnodes)
+  {
+    dump(os, pnode.get());
+  }
+  os << "Output(s): ";
+  for (auto &node_out : pgroup->outputs)
+    os << node_out->name() << " ";
+  os << std::endl;
+}
+
+void dump(std::ostream &os, const PGroups *pgroups)
+{
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    dump(os, pgroup.get());
+  }
+  os << "--- Node2Group items: " << std::endl;
+  for (auto it = pgroups->node2group.begin(); it != pgroups->node2group.end(); ++it)
+  {
+    auto node = it->first;
+    auto group = it->second;
+    os << "  Node: " << node << "(" << node->name() << "): " << group << std::endl;
+  }
+}
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PGroups *pgroups)
+{
+  luci::dump(os, pgroups);
+  return os;
+}
diff --git a/compiler/luci/partition/src/PartitionIRDump.h b/compiler/luci/partition/src/PartitionIRDump.h

new file mode 100644 (file)

index 0000000..8a4b3f5
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIRDump.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_IR_DUMP_H__
+#define __LUCI_PARTITION_IR_DUMP_H__
+
+#include "PartitionIR.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PNode *pnode);
+void dump(std::ostream &os, const PGroup *pgroup);
+void dump(std::ostream &os, const PGroups *pgroups);
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PGroups *pgroups);
+
+#endif // __LUCI_PARTITION_IR_DUMP_H__
diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp

new file mode 100644 (file)

index 0000000..038fc2a
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionMerge.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionMerge.h"
+
+#include <algorithm>
+
+namespace
+{
+
+/**
+ * @brief return true if pgroup_i output is one of the inputs of pgroup
+ */
+bool is_input_of(const luci::PGroup *pgroup_i, const luci::PGroup *pgroup)
+{
+  for (auto *output : pgroup_i->outputs)
+  {
+    for (auto *input : pgroup->inputs)
+    {
+      if (input == output)
+        return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * @brief return true if there is only one input or all the inputs have same group
+ * @note  pgroups is used to find group of pgroup
+ */
+bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
+{
+  assert(pgroups != nullptr);
+  assert(pgroup != nullptr);
+
+  const luci::PGroup *input_pgroup = nullptr;
+  std::string group;
+  for (auto &input : pgroup->inputs)
+  {
+    auto input_group = pgroups->group_of(input);
+    // NOTE: all the nodes should be registered and return should be valid group.
+    // convert_to_proups() should ensure this.
+    // assert here to find if there is any problem with this.
+    assert(not input_group.empty());
+    if (input_group.empty())
+      input_group = pgroups->default_group;
+
+    if (group.empty())
+      group = input_group;
+    else
+    {
+      if (group != input_group)
+        return false;
+    }
+    // if there are multiple inputs, all the inputs should be in same pgroup
+    // https://github.com/Samsung/ONE/issues/6230#issuecomment-801618150
+    // https://github.com/Samsung/ONE/issues/6230#issuecomment-801680531
+    auto pgroup_input = pgroups->pgroup_of(input);
+    if (pgroup_input != nullptr)
+    {
+      if (input_pgroup == nullptr)
+        input_pgroup = pgroup_input;
+      else
+      {
+        if (input_pgroup != pgroup_input)
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
+/**
+ * @brief merge pgroup into pgroup_i
+ * @note  output of pgroup_i should be input of pgroup
+ */
+void merge_into(luci::PGroup *pgroup, luci::PGroup *pgroup_i)
+{
+  for (auto &pnode : pgroup->pnodes)
+  {
+    // update pgroup for this pnode
+    pnode->pgroup = pgroup_i;
+    assert(pnode->group == pgroup_i->group);
+
+    // we don't need to add this in topological order:
+    // all the nodes will be created first then connection will be held
+    pgroup_i->pnodes.push_back(std::move(pnode));
+    // note: pnode is now nullptr as it's moved into pgroup_i->pnodes
+  }
+
+  for (auto &input : pgroup->inputs)
+  {
+    // add inputs of pgroup to pgroup_i if not member of pgroup_i
+    bool found_in_pgroup_i = false;
+    for (auto &pnode : pgroup_i->pnodes)
+    {
+      if (input == pnode->node)
+      {
+        found_in_pgroup_i = true;
+        break;
+      }
+    }
+    // skip if this input is already in the inputs
+    auto fit = std::find(pgroup_i->inputs.begin(), pgroup_i->inputs.end(), input);
+    if (fit != pgroup_i->inputs.end())
+    {
+      found_in_pgroup_i = true;
+    }
+    // note: if we force found_in_pgroup_i to false, for testing there will be
+    // unnecessary inputs
+    if (not found_in_pgroup_i)
+    {
+      // node input maybe in another pgroup
+      pgroup_i->inputs.push_back(input);
+    }
+  }
+  // add outputs of pgroup to pgroup_i outputs if not exist
+  for (auto &output : pgroup->outputs)
+  {
+    auto it = std::find(pgroup_i->outputs.begin(), pgroup_i->outputs.end(), output);
+    if (it == pgroup_i->outputs.end())
+    {
+      pgroup_i->outputs.push_back(output);
+    }
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * @brief This will merge pgroups with same group values in topological order
+ */
+std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups)
+{
+  // Make a copy of pgroups to apply merge action
+  // Q) do we really need a copy?
+  auto d_pgroups = s_pgroups->make_copy();
+
+  // Merge partition graphs
+  // - This is initial implementation that works for limited networks
+  // - if A and B is same group -> if A is input of B -> ... -> merge B into A
+  auto &pgroups = d_pgroups->pgroups;
+  bool changed;
+  do
+  {
+    changed = false;
+    for (auto &pgroup_i : pgroups)
+    {
+      bool merged = false;
+      for (auto it = pgroups.begin(); it != pgroups.end(); ++it)
+      {
+        auto &pgroup = *it;
+
+        // skip if same object
+        if (pgroup->id == pgroup_i->id)
+          continue;
+        // skip if different group
+        if (pgroup->group != pgroup_i->group)
+          continue;
+        // skip if not connected
+        if (!is_input_of(pgroup_i.get(), pgroup.get()))
+          continue;
+        // skip if there are multiple inputs but inputs differ in group
+        if (!is_input_same(pgroup.get(), d_pgroups.get()))
+          continue;
+        // TODO add more condition may be needed
+
+        merge_into(pgroup.get(), pgroup_i.get());
+
+        auto eit = d_pgroups->id2pgroup.find(pgroup->id);
+        assert(eit != d_pgroups->id2pgroup.end());
+        d_pgroups->id2pgroup.erase(eit);
+
+        // remove merged pgroup from pgroups
+        pgroups.erase(it);
+
+        merged = true;
+        break;
+      }
+      if (merged)
+      {
+        changed = true;
+        break;
+      }
+    }
+  } while (changed);
+
+  return std::move(d_pgroups);
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionMerge.h b/compiler/luci/partition/src/PartitionMerge.h

new file mode 100644 (file)

index 0000000..5c9fec2
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionMerge.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_MERGE_H__
+#define __LUCI_PARTITON_MERGE_H__
+
+#include "PartitionIR.h"
+
+#include <memory>
+
+namespace luci
+{
+
+std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_MERGE_H__
diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp

new file mode 100644 (file)

index 0000000..594ed6c
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPGroups.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPGroups.h"
+#include "PartitionIR.h"
+#include "CircleOpCode.h"
+
+#include "luci/Partition.h"
+#include "luci/Log.h"
+#include "luci/LogHelper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <loco.h>
+
+namespace
+{
+
+class IsVirtualNode final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  bool visit(const luci::CircleInput *) final { return true; }
+  bool visit(const luci::CircleOutput *) final { return true; }
+  // TODO add all virtual nodes
+
+  // default is false
+  bool visit(const luci::CircleNode *) final { return false; }
+};
+
+bool check_allocate_partition(const luci::CircleNode *node)
+{
+  IsVirtualNode query;
+  if (node->accept(&query))
+    return false;
+  /**
+   * @note About CircleConst
+   *       CirleConst acts like a part of some CircleNode and managing mulitiple
+   *       used(referenced) CircleConst is a bit difficult if it's used across
+   *       different PGroup. So we treat this different to other types.
+   *       https://github.com/Samsung/ONE/issues/6230#issuecomment-809802813
+   */
+  if (dynamic_cast<const luci::CircleConst *>(node) != nullptr)
+    return false;
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
+                                               const luci::PartitionTable &partition)
+{
+  assert(source != nullptr);
+  // TODO support multiple subgraphs
+  assert(source->size() == 1);
+
+  LOGGER(l);
+
+  auto pgroups = std::make_unique<luci::PGroups>();
+
+  pgroups->default_group = partition.default_group;
+
+  // Create a PGroup per CircleNode: each PGroup will have one CircleNode
+  auto graph = source->graph();
+  auto nodes = graph->nodes();
+  for (uint32_t idx = 0; idx < nodes->size(); ++idx)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(nodes->at(idx));
+
+    // check if node is normal node that we are interested
+    if (check_allocate_partition(node))
+    {
+      auto opcodename = luci::opcode_name(node);
+      assert(!opcodename.empty());
+
+      auto group = partition.default_group;
+      auto it = partition.byopcodes.find(opcodename);
+      if (it != partition.byopcodes.end())
+        group = it->second;
+
+      INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
+              << std::endl;
+
+      auto pgroup = std::make_unique<luci::PGroup>();
+      pgroup->group = group;
+      pgroup->id = idx + 1;
+
+      auto pnode = std::make_unique<luci::PNode>();
+      pnode->node = node;
+      pnode->group = group;
+      pnode->pgroup = pgroup.get();
+
+      pgroup->pnodes.push_back(std::move(pnode));
+
+      // Set input of PGroup
+      for (uint32_t in = 0; in < node->arity(); ++in)
+      {
+        auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
+        // this input maybe CircleInput in source graph
+        // --> not confident this is safe
+        pgroup->inputs.push_back(input);
+      }
+      // Set output of PGroup: node itself or multiple virtual outputs
+      // TODO support multiple virtual outputs
+      pgroup->outputs.push_back(node);
+
+      pgroups->node2group[node] = group;
+      pgroups->id2pgroup[pgroup->id] = pgroup.get();
+
+      pgroups->pgroups.push_back(std::move(pgroup));
+    }
+    else
+    {
+      INFO(l) << "Skip Op: " << node->name() << std::endl;
+      // record as default group
+      pgroups->node2group[node] = partition.default_group;
+    }
+  }
+
+  return std::move(pgroups);
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionPGroups.h b/compiler/luci/partition/src/PartitionPGroups.h

new file mode 100644 (file)

index 0000000..998e11c
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPGroups.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_PGROUPS_H__
+#define __LUCI_PARTITON_PGROUPS_H__
+
+#include "PartitionIR.h"
+
+#include "luci/Partition.h"
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+/**
+ * @brief This will produce a PGroups from Module and PartitionTable.
+ * @note  Each PGroup will hold one CircleNode and partition key value as group.
+ *        Supports only single Graph in the Module for now.
+ */
+std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
+                                               const luci::PartitionTable &partition);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_PGROUPS_H__
diff --git a/compiler/luci/partition/src/PartitionPGroups.test.cpp b/compiler/luci/partition/src/PartitionPGroups.test.cpp

new file mode 100644 (file)

index 0000000..960f3cd
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPGroups.test.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPGroups.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->dtype(loco::DataType::S32);
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  SqrtGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    SqrtGraphlet::init(g(), shape);
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+  }
+};
+
+} // namespace
+
+TEST(PartitionPGroupsTest, simple_produce)
+{
+  luci::Module module;
+
+  SqrtGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  luci::PartitionTable pt;
+  pt.default_group = "A";
+
+  auto pgs = produce_pgroups(&module, pt);
+
+  ASSERT_EQ(1, pgs->pgroups.size());
+}
diff --git a/compiler/luci/partition/src/PartitionPModules.cpp b/compiler/luci/partition/src/PartitionPModules.cpp

new file mode 100644 (file)

index 0000000..36f4d47
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModules.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModules.h"
+#include "ConnectNode.h"
+
+#include "luci/Service/CircleNodeClone.h"
+#include "luci/Log.h"
+
+#include <loco.h>
+
+namespace
+{
+
+void add_graph_input(loco::Graph *graph, luci::CircleInput *input_node)
+{
+  assert(graph != nullptr);
+  assert(input_node != nullptr);
+
+  auto graph_input = graph->inputs()->create();
+  graph_input->name(input_node->name());
+
+  // Set GraphInputOutputIndex for graph
+  input_node->index(graph_input->index());
+
+  // Data type
+  graph_input->dtype(input_node->dtype());
+
+  // Shape of GraphInput
+  auto input_shape = std::make_unique<loco::TensorShape>();
+  input_shape->rank(input_node->rank());
+  for (uint32_t r = 0; r < input_node->rank(); ++r)
+  {
+    if (input_node->dim(r).known())
+      input_shape->dim(r).set(input_node->dim(r).value());
+  }
+  graph_input->shape(std::move(input_shape));
+}
+
+void add_graph_output(loco::Graph *graph, luci::CircleOutput *output_node)
+{
+  assert(graph != nullptr);
+  assert(output_node != nullptr);
+
+  auto graph_output = graph->outputs()->create();
+  graph_output->name(output_node->name());
+
+  // Set GraphInputOutputIndex for graph
+  output_node->index(graph_output->index());
+
+  // Data type
+  graph_output->dtype(output_node->dtype());
+
+  // Shape of GraphOutput
+  auto output_shape = std::make_unique<loco::TensorShape>();
+  output_shape->rank(output_node->rank());
+  for (uint32_t r = 0; r < output_node->rank(); ++r)
+  {
+    if (output_node->dim(r).known())
+      output_shape->dim(r).set(output_node->dim(r).value());
+  }
+  graph_output->shape(std::move(output_shape));
+}
+
+/**
+ * @brief Build loco::graph from pgroup into graph
+ */
+void build_graph(loco::Graph *graph, const luci::PGroup *pgroup)
+{
+  LOGGER(l);
+
+  luci::CloneContext clonectx;
+
+  // add input node(s)
+  for (auto *input : pgroup->inputs)
+  {
+    auto *input_clone = graph->nodes()->create<luci::CircleInput>();
+    luci::copy_common_attributes(input, input_clone);
+
+    add_graph_input(graph, input_clone);
+    clonectx.emplace(input, input_clone);
+
+    INFO(l) << "MAP: "
+            << " input(" << input << ") -> " << input_clone << "(" << input_clone->name() << ")";
+  }
+
+  // add CircleConst for inputs
+  for (auto &pnode : pgroup->pnodes)
+  {
+    auto node = pnode->node;
+    uint32_t arity = node->arity();
+    for (uint32_t a = 0; a < arity; ++a)
+    {
+      auto in_a_const = dynamic_cast<luci::CircleConst *>(node->arg(a));
+      if (in_a_const != nullptr)
+      {
+        auto it = clonectx.find(in_a_const);
+        if (it == clonectx.end())
+        {
+          auto *clone = clone_node(in_a_const, graph);
+          clonectx.emplace(in_a_const, clone);
+
+          INFO(l) << "MAP: "
+                  << " const(" << in_a_const << ") -> " << clone << "(" << clone->name() << ")";
+        }
+      }
+    }
+  }
+
+  // add nodes
+  for (auto &pnode : pgroup->pnodes)
+  {
+    auto *clone = clone_node(pnode->node, graph);
+    clonectx.emplace(pnode->node, clone);
+
+    INFO(l) << "MAP: "
+            << "  node(" << pnode->node << ") -> " << clone << "(" << clone->name() << ")";
+  }
+  // connect nodes
+  for (auto &pnode : pgroup->pnodes)
+  {
+    clone_connect(pnode->node, clonectx);
+  }
+
+  // add output node(s)
+  for (auto *output : pgroup->outputs)
+  {
+    auto *output_clone = graph->nodes()->create<luci::CircleOutput>();
+    luci::copy_common_attributes(output, output_clone);
+    // note: we don't add output_clone to clonectx.
+    // logically, output is not used as an input to any other nodes.
+
+    auto it = clonectx.find(output);
+    assert(it != clonectx.end());
+    output_clone->from(it->second);
+
+    add_graph_output(graph, output_clone);
+
+    INFO(l) << "MAP: "
+            << "output(" << output << ") -> " << output_clone << "(" << output_clone->name() << ")"
+            << ": from " << it->second << "(" << it->second->name() << ")";
+  }
+}
+
+std::string make_name(const luci::PGroup *pgroup)
+{
+  auto &first_pnode = *pgroup->pnodes.begin();
+  auto *first_node = first_pnode->node;
+  std::string name = first_node->graph()->name();
+  name = name + "_" + pgroup->group;
+  return name;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * @brief This will produce list of luci::Module as PartedModules from pgroups
+ */
+luci::PartedModules produce_pmodules(const luci::PGroups *pgroups)
+{
+  LOGGER(l);
+
+  luci::PartedModules pms;
+
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    luci::PartedModule pm;
+    pm.module = std::make_unique<luci::Module>();
+    pm.group = pgroup->group;
+
+    auto graph = loco::make_graph();
+
+    auto graph_name = make_name(pgroup.get());
+    graph->name(graph_name);
+
+    INFO(l) << "--- Partition Graph build----------------------";
+    INFO(l) << "--- name: " << graph_name;
+    build_graph(graph.get(), pgroup.get());
+
+    pm.module->add(std::move(graph));
+    pms.pmodules.emplace_back(std::move(pm));
+  }
+
+  return pms;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionPModules.h b/compiler/luci/partition/src/PartitionPModules.h

new file mode 100644 (file)

index 0000000..628ada5
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModules.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_PMODULES_H__
+#define __LUCI_PARTITON_PMODULES_H__
+
+#include "PartitionIR.h"
+
+#include "luci/Partition.h"
+
+namespace luci
+{
+
+luci::PartedModules produce_pmodules(const luci::PGroups *pgroups);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_PMODULES_H__
diff --git a/compiler/luci/partition/src/PartitionPModules.test.cpp b/compiler/luci/partition/src/PartitionPModules.test.cpp

new file mode 100644 (file)

index 0000000..99c39e8
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModules.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModules.h"
+#include "PartitionPGroups.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->dtype(loco::DataType::S32);
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  SqrtGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    SqrtGraphlet::init(g(), shape);
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+  }
+};
+
+} // namespace
+
+TEST(PartitionPModulesTest, simple_convert)
+{
+  luci::Module module;
+
+  SqrtGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  luci::PartitionTable pt;
+  pt.default_group = "A";
+
+  auto pgs = produce_pgroups(&module, pt);
+  auto pms = produce_pmodules(pgs.get());
+
+  ASSERT_EQ(1, pms.pmodules.size());
+}
diff --git a/compiler/luci/partition/src/PartitionPModulesDump.cpp b/compiler/luci/partition/src/PartitionPModulesDump.cpp

new file mode 100644 (file)

index 0000000..ee50bc6
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModulesDump.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModulesDump.h"
+
+#include "luci/LogHelper.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PartedModule *pmodule)
+{
+  os << "--- PartedModule: " << pmodule->group << std::endl;
+  os << luci::fmt(pmodule->module->graph());
+}
+
+void dump(std::ostream &os, const PartedModules *pmodules)
+{
+  for (auto &pmodule : pmodules->pmodules)
+  {
+    dump(os, &pmodule);
+  }
+  os << std::endl;
+}
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PartedModules *pmodules)
+{
+  luci::dump(os, pmodules);
+  return os;
+}
diff --git a/compiler/luci/partition/src/PartitionPModulesDump.h b/compiler/luci/partition/src/PartitionPModulesDump.h

new file mode 100644 (file)

index 0000000..e77b235
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModulesDump.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_PMODULES_DUMP_H__
+#define __LUCI_PARTITION_PMODULES_DUMP_H__
+
+#include "luci/Partition.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PartedModule *pmodule);
+void dump(std::ostream &os, const PartedModules *pmodules);
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PartedModules *pmodules);
+
+#endif // __LUCI_PARTITION_PMODULES_DUMP_H__
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt

index 2c5fb3407c221f89796acb9c58cb5873b65d8b58..2977fbed711c5118bf9da64c5ce08b8e7940944f 100644 (file)
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -12,6 +12,7 @@ target_link_libraries(luci_pass PRIVATE luci_lang)
  target_link_libraries(luci_pass PRIVATE luci_log)
  target_link_libraries(luci_pass PRIVATE luci_service)
  target_link_libraries(luci_pass PRIVATE luci_logex)
+target_link_libraries(luci_pass PRIVATE luci_profile)
  target_link_libraries(luci_pass PRIVATE nncc_common)
  target_link_libraries(luci_pass PRIVATE oops)
  install(TARGETS luci_pass DESTINATION lib)
@@ -26,4 +27,5 @@ GTest_AddTest(luci_pass_test ${TESTS})
  target_include_directories(luci_pass_test PRIVATE src)
  target_link_libraries(luci_pass_test luci_pass)
  target_link_libraries(luci_pass_test luci_lang)
+target_link_libraries(luci_pass_test luci_testhelper)
  #target_link_libraries(luci_pass_test oops)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h

index 906760e0ac330b69ad3c813961a88891ce906fde..1f5e1c8b9587ae16df6e1073f5ca75486de17aa1 100644 (file)
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -35,6 +35,8 @@ public:
      enum Algorithm
      {
        FuseAddWithTConv,
+      FuseBatchNormWithConv,
+      FuseBatchNormWithDwConv,
        FuseBatchNormWithTConv,
        FuseBCQ,
        FuseInstanceNorm,
@@ -44,7 +46,11 @@ public:
        QuantizeDequantizeWeights,
        QuantizeWithMinMax,
        Requantize,
+      FoldAddV2,
+      FoldCast,
        FoldDequantize,
+      FoldSparseToDense,
+      ForwardReshapeToUnaryOp,
        SparsifyTensorPass,
        FusePreActivationBatchNorm,
        MakeBatchNormGammaPositive,
@@ -53,6 +59,15 @@ public:
        RemoveRedundantTranspose,
        ReplaceMulAddWithDepthwiseConv,
        SubstitutePackToReshape,
+      SubstituteSqueezeToReshape,
+      ConvertNCHWToNHWC,
+      RemoveUnnecessarySlice,
+      RemoveUnnecessaryStridedSlice,
+      RemoveUnnecessarySplit,
+      RemoveUnnecessaryReshape,
+      TransformMinMaxToRelu6Pass,
+      SubstituteTransposeToReshape,
+      RemoveRedundantReshape,
      };
  
      enum AlgorithmParameters
@@ -68,6 +83,10 @@ public:
        Sparsify_format,
        Sparsify_block_size,
        Sparsify_block_map,
+
+      // convert NCHW to NHWC
+      NCHW_to_NHWC_preserve_input_shape,
+      NCHW_to_NHWC_preserve_output_shape,
      };
  
      virtual ~Options() = default;
diff --git a/compiler/luci/pass/include/luci/Pass/CircleShapeInferencePass.h b/compiler/luci/pass/include/luci/Pass/CircleShapeInferencePass.h

new file mode 100644 (file)

index 0000000..21d6d09
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/CircleShapeInferencePass.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <luci/ModulePass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to infer shape of circle nodes
+ */
+class CircleShapeInferencePass : public luci::Pass
+{
+public:
+  virtual const char *name(void) const { return "luci::CircleShapeInferencePass"; }
+
+public:
+  bool run(luci::Module *m);
+  bool run(loco::Graph *graph);
+};
+
+} // namespace luci
+
+#endif //__LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ConvertNCHWToNHWCPass.h b/compiler/luci/pass/include/luci/Pass/ConvertNCHWToNHWCPass.h

new file mode 100644 (file)

index 0000000..ba23925
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ConvertNCHWToNHWCPass.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
+#define __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief   Class to convert NCHW Ops to NHWC
+ *
+ * @details Find operators that use NCHW layout and make them use NHWC.
+ *          Strictly speaking, it is impossible to distinguish whether
+ *          an operator is using NCHW or NHWC without programmers' annotations.
+ *          But we guess the data layout of each operator as much as possible
+ *          based on the assumptions described in the comments.
+ *          Note that this Pass does not change the execution result even
+ *          for the false-positive cases.
+ */
+struct ConvertNCHWToNHWCPass final : public logo::Pass
+{
+public:
+  ConvertNCHWToNHWCPass(bool preserve_input, bool preserve_output)
+    : _preserve_input(preserve_input), _preserve_output(preserve_output)
+  {
+    // Do nothing
+  }
+
+  ConvertNCHWToNHWCPass() = delete;
+
+  virtual ~ConvertNCHWToNHWCPass() = default;
+
+  const char *name(void) const final { return "luci::ConvertNCHWToNHWCPass"; }
+
+  bool run(loco::Graph *g) final;
+
+private:
+  bool _preserve_input = false;
+  bool _preserve_output = false;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldAddV2Pass.h b/compiler/luci/pass/include/luci/Pass/FoldAddV2Pass.h

new file mode 100644 (file)

index 0000000..cd260b9
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldAddV2Pass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_ADD_V2_PASS_H__
+#define __LUCI_FOLD_ADD_V2_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold AddV2 to a constant tensor
+ *
+ */
+struct FoldAddV2Pass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldAddV2Pass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_ADD_V2_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldCastPass.h b/compiler/luci/pass/include/luci/Pass/FoldCastPass.h

new file mode 100644 (file)

index 0000000..5d7ce4a
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldCastPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_CAST_PASS_H__
+#define __LUCI_FOLD_CAST_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold Cast to a constant tensor
+ *
+ */
+struct FoldCastPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldCastPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_CAST_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldSparseToDensePass.h b/compiler/luci/pass/include/luci/Pass/FoldSparseToDensePass.h

new file mode 100644 (file)

index 0000000..00d2447
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldSparseToDensePass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
+#define __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold SparseToDense to a constant tensor
+ *
+ */
+struct FoldSparseToDensePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldSparseToDensePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ForwardReshapeToUnaryOpPass.h b/compiler/luci/pass/include/luci/Pass/ForwardReshapeToUnaryOpPass.h

new file mode 100644 (file)

index 0000000..4c308e5
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ForwardReshapeToUnaryOpPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
+#define __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Forward send Reshape after UnaryOp.
+ */
+struct ForwardReshapeToUnaryOpPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ForwardReshapeToUnaryOpPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithConvPass.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithConvPass.h

new file mode 100644 (file)

index 0000000..1ed8544
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithConvPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Batch Normalization into CircleConv
+ */
+struct FuseBatchNormWithConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseBatchNormWithConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithDwConvPass.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithDwConvPass.h

new file mode 100644 (file)

index 0000000..32885c6
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithDwConvPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Batch Normalization into CircleDepthWiseConv2D
+ */
+struct FuseBatchNormWithDwConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseBatchNormWithDwConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h

deleted file mode 100644 (file)

index d3e930a..0000000
--- a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
-#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
-
-#include <logo/Pass.h>
-
-namespace luci
-{
-
-/**
- * @brief  Class to fuse Batch Normalization into CircleTransposeConv
- */
-struct FuseBatchNormWithTConvPass final : public logo::Pass
-{
-  const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; }
-
-  bool run(loco::Graph *g) final;
-};
-
-} // namespace luci
-
-#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConvPass.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConvPass.h

new file mode 100644 (file)

index 0000000..d3e930a
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConvPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Batch Normalization into CircleTransposeConv
+ */
+struct FuseBatchNormWithTConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h b/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h

deleted file mode 100644 (file)

index c0ebc4e..0000000
--- a/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
-#define __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
-
-#include <loco.h>
-
-#include <luci/ModulePass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to copy shape/dtype of loco to circle node
- *
- * CAUTION : This pass will be removed after refactoring is finished
- */
-class MigrateLegacyShapeDtypePass : public luci::Pass
-{
-public:
-  virtual const char *name(void) const { return "luci::MigrateLegacyShapeDtypePass"; }
-
-public:
-  bool run(luci::Module *m);
-  bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h

index 713b88f9d5e193ab01d2c9690e03737ee3508ae3..78e7323f9c637674590ada3c9d6e7f60ddbce09f 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
@@ -34,7 +34,7 @@ class QuantizeDequantizeWeightsPass : public logo::Pass
  public:
    QuantizeDequantizeWeightsPass(loco::DataType input_dtype, loco::DataType output_dtype,
                                  QuantizationGranularity granularity)
-      : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+    : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
    {
      // DO NOTHING
    }
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h

index bb0d0ff4041a3bbda0d2951fd7ce883265a326ea..9520910d5068b7be44cda1daefc0ff7d87dd57f7 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
@@ -34,7 +34,7 @@ class QuantizeWithMinMaxPass : public logo::Pass
  public:
    QuantizeWithMinMaxPass(loco::DataType input_dtype, loco::DataType output_dtype,
                           QuantizationGranularity granularity)
-      : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+    : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
    {
      // DO NOTHING
    }
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantReshapePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantReshapePass.h

new file mode 100644 (file)

index 0000000..458ffc0
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantReshapePass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove redundant Reshape node into 1 Reshape node.
+ * @details This class will update consecutive two Reshape node into single Reshape node.
+ *          As Reshape operation just change shape, not buffer, former reshape could be unnecessary.
+ */
+struct RemoveRedundantReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveRedundantReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapePass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapePass.h

new file mode 100644 (file)

index 0000000..8fca35e
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Remove Unnecessary(input shape and output shape same) Reshape node.
+ */
+struct RemoveUnnecessaryReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessaryReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySlicePass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySlicePass.h

new file mode 100644 (file)

index 0000000..a3b0f2f
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySlicePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
+#define __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Remove Unnecessary(input and output are same) Slice node.
+ */
+struct RemoveUnnecessarySlicePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessarySlicePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySplitPass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySplitPass.h

new file mode 100644 (file)

index 0000000..0d9330f
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySplitPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Remove unnecessary Split OP
+ */
+struct RemoveUnnecessarySplitPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessarySplitPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryStridedSlicePass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryStridedSlicePass.h

new file mode 100644 (file)

index 0000000..0f6a61d
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryStridedSlicePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Remove Unnecessary(input and output are same) StridedSlice node.
+ */
+struct RemoveUnnecessaryStridedSlicePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessaryStridedSlicePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RequantizePass.h b/compiler/luci/pass/include/luci/Pass/RequantizePass.h

index 2442b24ea57af3cd67f4120f9c1967848f7dc02a..c6c424f1b03182e7198e7cb52a584961d3a41fd9 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/RequantizePass.h
+++ b/compiler/luci/pass/include/luci/Pass/RequantizePass.h
@@ -33,7 +33,7 @@ class RequantizePass : public logo::Pass
  {
  public:
    RequantizePass(loco::DataType input_dtype, loco::DataType output_dtype)
-      : _input_dtype{input_dtype}, _output_dtype{output_dtype}
+    : _input_dtype{input_dtype}, _output_dtype{output_dtype}
    {
      // DO NOTHING
    }
diff --git a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h b/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h

deleted file mode 100644 (file)

index e21ab4c..0000000
--- a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_SHAPE_INFERENCE_PASS_H__
-#define __LUCI_SHAPE_INFERENCE_PASS_H__
-
-#include <loco.h>
-
-#include <luci/ModulePass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to infer shape of nodes
- */
-class ShapeInferencePass : public luci::Pass
-{
-public:
-  virtual const char *name(void) const { return "luci::ShapeInferencePass"; }
-
-public:
-  bool run(luci::Module *m);
-  bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_SHAPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h b/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h

deleted file mode 100644 (file)

index 2c6ffcf..0000000
--- a/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
-#define __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
-
-#include <loco.h>
-
-#include <luci/ModulePass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to infer shape_signature of nodes
- */
-class ShapeSignatureInferencePass : public luci::Pass
-{
-public:
-  virtual const char *name(void) const { return "luci::ShapeSignatureInferencePass"; }
-
-public:
-  bool run(luci::Module *m);
-  bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SparsifyTensorPass.h b/compiler/luci/pass/include/luci/Pass/SparsifyTensorPass.h

index 41f43bf88fe7b5fa9813b43a8cfce33ba12c4677..0ce142c55e78adc9689b28e08aada1dfc9da9cfb 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/SparsifyTensorPass.h
+++ b/compiler/luci/pass/include/luci/Pass/SparsifyTensorPass.h
@@ -35,8 +35,8 @@ public:
    SparsifyTensorPass(const std::string &tensor_name, const std::vector<int32_t> &traversal_order,
                       const std::vector<DimensionType> &format,
                       const std::vector<int32_t> &block_size, const std::vector<int32_t> &block_map)
-      : _tensor_name{tensor_name}, _traversal_order{traversal_order}, _format{format},
-        _block_size{block_size}, _block_map{block_map}
+    : _tensor_name{tensor_name}, _traversal_order{traversal_order}, _format{format},
+      _block_size{block_size}, _block_map{block_map}
    {
      // DO NOTHING
    }
diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteSqueezeToReshapePass.h b/compiler/luci/pass/include/luci/Pass/SubstituteSqueezeToReshapePass.h

new file mode 100644 (file)

index 0000000..d8df6ac
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstituteSqueezeToReshapePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Substitute Squeeze to Reshape node for certain conditions.
+ */
+struct SubstituteSqueezeToReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstituteSqueezeToReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteTransposeToReshapePass.h b/compiler/luci/pass/include/luci/Pass/SubstituteTransposeToReshapePass.h

new file mode 100644 (file)

index 0000000..ee70858
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstituteTransposeToReshapePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Substitute Transpose with certain input shape condition to single reshape node.
+ */
+struct SubstituteTransposeToReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstituteTransposeToReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/TransformMinMaxToRelu6Pass.h b/compiler/luci/pass/include/luci/Pass/TransformMinMaxToRelu6Pass.h

new file mode 100644 (file)

index 0000000..9ea39ee
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/TransformMinMaxToRelu6Pass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
+#define __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to transform Maximum(Minimum(input, 6), 0) to Relu6
+ */
+struct TransformMinMaxToRelu6Pass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::TransformMinMaxToRelu6Pass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h b/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h

deleted file mode 100644 (file)

index 9d964bd..0000000
--- a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h
+++ /dev/null
@@ -1,43 +0,0 @@
-
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_TYPE_INFERENCE_PASS_H__
-#define __LUCI_TYPE_INFERENCE_PASS_H__
-
-#include <loco.h>
-
-#include <luci/ModulePass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to infer type of nodes
- */
-class TypeInferencePass : public luci::Pass
-{
-public:
-  virtual const char *name(void) const { return "luci::TypeInferencePass"; }
-
-public:
-  bool run(luci::Module *m);
-  bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_TYPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.cpp

new file mode 100644 (file)

index 0000000..c1a06bf
--- /dev/null
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchNormPatternFinder.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta)
+{
+  auto x = loco::must_cast<luci::CircleNode *>(add->x());
+  auto y = loco::must_cast<luci::CircleNode *>(add->y());
+
+  luci::CircleMul *pred = nullptr;
+  luci::CircleConst *constant = nullptr;
+
+  if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL)
+  {
+    pred = loco::must_cast<luci::CircleMul *>(y);
+    constant = loco::must_cast<luci::CircleConst *>(x);
+  }
+  else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST)
+  {
+    pred = loco::must_cast<luci::CircleMul *>(x);
+    constant = loco::must_cast<luci::CircleConst *>(y);
+  }
+  else
+  {
+    return false;
+  }
+
+  if (constant->rank() != 1)
+    return false;
+
+  auto channel_dim = constant->dim(0);
+  // Assumption: Layout is channel-last
+  if (!(channel_dim == add->dim(add->rank() - 1)))
+    return false;
+
+  mul = pred;
+  beta = constant;
+  return true;
+}
+
+bool is_batchnorm_add(const luci::CircleAdd *add)
+{
+  // for dummy mul and beta
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  return is_batchnorm_add(add, mul, beta);
+}
+
+bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
+                      luci::CircleConst *&gamma)
+{
+  auto x = dynamic_cast<luci::CircleConst *>(mul->x());
+  auto y = dynamic_cast<luci::CircleConst *>(mul->y());
+
+  luci::CircleNode *pred = nullptr;
+  luci::CircleConst *constant = nullptr;
+
+  if (x != nullptr && y == nullptr)
+  {
+    pred = loco::must_cast<luci::CircleNode *>(mul->y());
+    constant = x;
+  }
+  else if (x == nullptr && y != nullptr)
+  {
+    pred = loco::must_cast<luci::CircleNode *>(mul->x());
+    constant = y;
+  }
+  else
+  {
+    return false;
+  }
+
+  if (constant->rank() != 1)
+    return false;
+
+  auto channel_dim = constant->dim(0);
+  // Assumption: Layout is channel-last
+  if (!(channel_dim == mul->dim(mul->rank() - 1)))
+    return false;
+
+  pred_node = pred;
+  gamma = constant;
+  return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.h b/compiler/luci/pass/src/BatchNormPatternFinder.h

new file mode 100644 (file)

index 0000000..58cdbb4
--- /dev/null
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
+#define __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+/**
+ * @brief Find Mul-Add pattern and return Mul and beta as BatchNorm
+ */
+bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta);
+
+/**
+ * @brief Find Mul-Add pattern
+ */
+bool is_batchnorm_add(const luci::CircleAdd *add);
+
+/**
+ * @brief Find Const-Mul pattern and return Node and gamma as BatchNorm
+ */
+bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
+                      luci::CircleConst *&gamma);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp

new file mode 100644 (file)

index 0000000..08e7fac
--- /dev/null
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchNormPatternFinder.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace luci
+{
+namespace test
+{
+
+/**
+ * @brief Graphlet with Add and Const as beta from BatchNorm
+ */
+class AddBetaGraphlet
+{
+public:
+  AddBetaGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 shape, luci::FusedActFunc actf)
+  {
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add_beta = g->nodes()->create<luci::CircleConst>();
+
+    _add->dtype(loco::DataType::FLOAT32);
+    _add_beta->dtype(loco::DataType::FLOAT32);
+
+    _add->fusedActivationFunction(actf);
+
+    assert(shape.size() > 0);
+    auto last_it = std::prev(shape.end(), 1);
+    auto channel_size = *last_it;
+
+    _add->shape(shape);
+    _add_beta->shape({channel_size});
+    _add_beta->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+      _add_beta->at<loco::DataType::FLOAT32>(i) = i;
+
+    _add->name("add");
+    _add_beta->name("add_beta");
+  }
+
+public:
+  luci::CircleAdd *add() { return _add; }
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_add_beta = nullptr;
+};
+
+/**
+ * @brief Graphlet with Mul and Const as gamma from BatchNorm
+ */
+class MulGammaGraphlet
+{
+public:
+  MulGammaGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 shape, luci::FusedActFunc actf)
+  {
+    _mul = g->nodes()->create<luci::CircleMul>();
+    _mul_gamma = g->nodes()->create<luci::CircleConst>();
+
+    _mul->dtype(loco::DataType::FLOAT32);
+    _mul_gamma->dtype(loco::DataType::FLOAT32);
+
+    _mul->fusedActivationFunction(actf);
+
+    assert(shape.size() > 0);
+    auto last_it = std::prev(shape.end(), 1);
+    auto channel_size = *last_it;
+
+    _mul->shape(shape);
+    _mul_gamma->shape({channel_size});
+    _mul_gamma->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+      _mul_gamma->at<loco::DataType::FLOAT32>(i) = i;
+
+    _mul->name("mul");
+    _mul_gamma->name("mul_gamma");
+  }
+
+public:
+  luci::CircleMul *mul(void) { return _mul; }
+
+protected:
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleConst *_mul_gamma = nullptr;
+};
+
+/**
+ * @brief Graph of Mul-Add pattern from BatchNorm
+ */
+class MulAddGraph : public TestIOGraph, public AddBetaGraphlet, public MulGammaGraphlet
+{
+public:
+  MulAddGraph() = default;
+
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    MulGammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
+    AddBetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
+
+    // connect network
+    _mul->x(input());
+    _mul->y(_mul_gamma);
+    _add->x(_mul);
+    _add->y(_add_beta);
+    output()->from(_add);
+  }
+};
+
+/**
+ * @brief Graph of Add with Const
+ */
+class AddGraph : public TestIOGraph, public AddBetaGraphlet
+{
+public:
+  AddGraph() = default;
+
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    AddBetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
+
+    // connect network
+    _add->x(input());
+    _add->y(_add_beta);
+    output()->from(_add);
+  }
+};
+
+} // namespace test
+} // namespace luci
+
+class BatchNormPatternFinderMulAddTest : public ::testing::Test
+{
+public:
+  BatchNormPatternFinderMulAddTest() = default;
+
+protected:
+  luci::test::MulAddGraph _mag;
+};
+
+class BatchNormPatternFinderAddTest : public ::testing::Test
+{
+public:
+  BatchNormPatternFinderAddTest() = default;
+
+protected:
+  luci::test::AddGraph _ag;
+};
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add)
+{
+  _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  auto res = luci::is_batchnorm_add(_mag.add(), mul, beta);
+  ASSERT_TRUE(res);
+  ASSERT_NE(nullptr, mul);
+  ASSERT_NE(nullptr, beta);
+}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add2)
+{
+  _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  auto res = luci::is_batchnorm_add(_mag.add());
+  ASSERT_TRUE(res);
+}
+
+TEST_F(BatchNormPatternFinderAddTest, is_batchnorm_add_NEG)
+{
+  _ag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  auto res = luci::is_batchnorm_add(_ag.add(), mul, beta);
+  ASSERT_FALSE(res);
+}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul)
+{
+  _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleNode *pred = nullptr;
+  luci::CircleConst *gamma = nullptr;
+
+  auto res = luci::is_batchnorm_mul(_mag.mul(), pred, gamma);
+  ASSERT_TRUE(res);
+  ASSERT_NE(nullptr, pred);
+  ASSERT_NE(nullptr, gamma);
+}
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp

index cc9fe481c48f2751ea60cbb3bb15d32caf396407..bddad34fa6809fb99c7bdbfd9cdbdc0330c7c66a 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -16,16 +16,28 @@
  
  #include "luci/CircleOptimizer.h"
  
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "luci/Pass/FoldAddV2Pass.h"
+#include "luci/Pass/FoldCastPass.h"
  #include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
  #include "luci/Pass/FuseActivationFunctionPass.h"
  #include "luci/Pass/FuseAddWithTConvPass.h"
-#include "luci/Pass/FuseBatchNormWithTConv.h"
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
  #include "luci/Pass/FuseBCQPass.h"
  #include "luci/Pass/FuseInstanceNormPass.h"
  #include "luci/Pass/FusePreActivationBatchNormPass.h"
  #include "luci/Pass/MakeBatchNormGammaPositivePass.h"
  #include "luci/Pass/PropagateQuantParamPass.h"
+#include "luci/Pass/RemoveRedundantReshapePass.h"
  #include "luci/Pass/RemoveRedundantTransposePass.h"
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
  #include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
  #include "luci/Pass/ResolveCustomOpAddPass.h"
  #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
@@ -36,21 +48,22 @@
  #include "luci/Pass/SparsifyTensorPass.h"
  #include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
  #include "luci/Pass/SubstitutePackToReshapePass.h"
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
  // TODO add more passes
  
-#include "luci/Pass/ShapeInferencePass.h"
-#include "luci/Pass/ShapeSignatureInferencePass.h"
-#include "luci/Pass/TypeInferencePass.h"
-
-// Following passes will be removed after refactoring is finished
-#include "luci/Pass/MigrateLegacyShapeDtypePass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
  
  // logo passes
  #include <logo/RemoveDeadNodeWithQueryPass.h>
  
  #include "ModulePhase.h"
  #include "ProgressReporter.h"
-#include "CircleOptimizerUtils.h"
+#include "helpers/Strings.h"
+
+#include "QuantizedModelVerifier.h"
  
  #include <luci/IR/CircleNodes.h>
  #include <logo/Phase.h>
@@ -61,20 +74,6 @@
  namespace
  {
  
-std::vector<int> parseIntFromCommadelimitedStr(std::string str)
-{
-  std::vector<int> ret;
-  std::istringstream is(str);
-  for (uint32_t i; is >> i;)
-  {
-    assert(i != ',');
-    ret.push_back(i);
-    if (is.peek() == ',')
-      is.ignore();
-  }
-  return ret;
-}
-
  using namespace luci;
  
  class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options
@@ -138,13 +137,9 @@ void CircleOptimizer::optimize(luci::Module *m) const
  {
    luci::Phase phase;
  
-  // Following passes will be deprecated after refactoring is finished.
-  phase.emplace_back(std::make_unique<luci::MigrateLegacyShapeDtypePass>());
-
    // Following passes are needed everytime when other passes create new node or modify some nodes.
-  phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
-  phase.emplace_back(std::make_unique<luci::ShapeSignatureInferencePass>());
-  phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
  
    if (_options->query(Options::Algorithm::FuseBCQ))
    {
@@ -164,13 +159,9 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    /* TRANSFORM DECLARATION BEGIN */
    phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
  
-  // Following passes will be deprecated after refactoring is finished.
-  phase.emplace_back(std::make_unique<luci::MigrateLegacyShapeDtypePass>());
-
    // Following passes are needed everytime when other passes create new node or modify some nodes.
-  phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
-  phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
-  phase.emplace_back(std::make_unique<luci::ShapeSignatureInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
  
    if (_options->query(Options::Algorithm::ResolveCustomOpAdd))
    {
@@ -188,6 +179,14 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
    }
+  if (_options->query(Options::Algorithm::FuseBatchNormWithConv))
+  {
+    phase.emplace_back(std::make_unique<FuseBatchNormWithConvPass>());
+  }
+  if (_options->query(Options::Algorithm::FuseBatchNormWithDwConv))
+  {
+    phase.emplace_back(std::make_unique<FuseBatchNormWithDwConvPass>());
+  }
    if (_options->query(Options::Algorithm::FuseBatchNormWithTConv))
    {
      phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
@@ -200,10 +199,26 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<FuseActivationFunctionPass>());
    }
+  if (_options->query(Options::Algorithm::FoldAddV2))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldAddV2Pass>());
+  }
+  if (_options->query(Options::Algorithm::FoldCast))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldCastPass>());
+  }
    if (_options->query(Options::Algorithm::FoldDequantize))
    {
      phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
    }
+  if (_options->query(Options::Algorithm::FoldSparseToDense))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldSparseToDensePass>());
+  }
+  if (_options->query(Options::Algorithm::ForwardReshapeToUnaryOp))
+  {
+    phase.emplace_back(std::make_unique<luci::ForwardReshapeToUnaryOpPass>());
+  }
    if (_options->query(Options::Algorithm::FusePreActivationBatchNorm))
    {
      phase.emplace_back(std::make_unique<luci::FusePreActivationBatchNormPass>());
@@ -216,6 +231,26 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<luci::ShuffleWeightTo16x1Float32Pass>());
    }
+  if (_options->query(Options::Algorithm::RemoveUnnecessaryReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapePass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveUnnecessarySlice))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessarySlicePass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveUnnecessaryStridedSlice))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryStridedSlicePass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveUnnecessarySplit))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessarySplitPass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveRedundantReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantReshapePass>());
+  }
    if (_options->query(Options::Algorithm::RemoveRedundantTranspose))
    {
      phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>());
@@ -228,6 +263,28 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<luci::SubstitutePackToReshapePass>());
    }
+  if (_options->query(Options::Algorithm::SubstituteSqueezeToReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstituteSqueezeToReshapePass>());
+  }
+  if (_options->query(Options::Algorithm::SubstituteTransposeToReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstituteTransposeToReshapePass>());
+  }
+  if (_options->query(Options::Algorithm::TransformMinMaxToRelu6Pass))
+  {
+    phase.emplace_back(std::make_unique<luci::TransformMinMaxToRelu6Pass>());
+  }
+  if (_options->query(Options::Algorithm::ConvertNCHWToNHWC))
+  {
+    bool preserve_input =
+      _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_preserve_input_shape) == "true";
+    bool preserve_output =
+      _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_preserve_output_shape) == "true";
+
+    phase.emplace_back(
+      std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output));
+  }
  
    /* TRANSFORM DECLARATION END */
  
@@ -275,7 +332,7 @@ void CircleOptimizer::quantize(loco::Graph *g) const
      }
  
      luci::QuantizeDequantizeWeightsPass fake_quantizer(
-        str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
+      str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
      fake_quantizer.run(g);
    }
  
@@ -315,14 +372,19 @@ void CircleOptimizer::quantize(loco::Graph *g) const
  
      phase.emplace_back(std::make_unique<luci::PropagateQuantParamPass>());
  
-    phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
-    phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+    phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+    phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
      phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
  
      ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
      logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
      phase_runner.attach(&prog);
      phase_runner.run(phase);
+
+    // Verify the type/granularity of the quantized model
+    luci::QuantizedModelVerifier verifier(str_to_dtype(output_dtype),
+                                          str_to_granularity(granularity));
+    verifier.verify(g);
    }
  
    // Requantize
@@ -349,8 +411,8 @@ void CircleOptimizer::quantize(loco::Graph *g) const
    logo::Phase phase;
  
    // Do Shape/Type inference
-  phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
-  phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
  
    ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
    logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
@@ -364,13 +426,13 @@ void CircleOptimizer::sparsify(loco::Graph *g) const
    {
      std::string tensor_name = _options->param(Options::AlgorithmParameters::Sparsify_tensor_name);
      std::string str_tarversal_order =
-        _options->param(Options::AlgorithmParameters::Sparsify_traversal_order);
+      _options->param(Options::AlgorithmParameters::Sparsify_traversal_order);
      std::string str_format = _options->param(Options::AlgorithmParameters::Sparsify_format);
      std::string str_block_size = _options->param(Options::AlgorithmParameters::Sparsify_block_size);
      std::string str_block_map = _options->param(Options::AlgorithmParameters::Sparsify_block_map);
  
      // traversal order
-    std::vector<int32_t> traversal_order = parseIntFromCommadelimitedStr(str_tarversal_order);
+    std::vector<int32_t> traversal_order = csv_to_vector<int32_t>(str_tarversal_order);
      // format
      std::vector<DimensionType> format;
      std::istringstream is(str_format);
@@ -385,9 +447,9 @@ void CircleOptimizer::sparsify(loco::Graph *g) const
          is.ignore();
      }
      // block size
-    std::vector<int32_t> block_size = parseIntFromCommadelimitedStr(str_block_size);
+    std::vector<int32_t> block_size = csv_to_vector<int32_t>(str_block_size);
      // block map
-    std::vector<int32_t> block_map = parseIntFromCommadelimitedStr(str_block_map);
+    std::vector<int32_t> block_map = csv_to_vector<int32_t>(str_block_map);
  
      luci::SparsifyTensorPass sparsifier{tensor_name, traversal_order, format, block_size,
                                          block_map};
diff --git a/compiler/luci/pass/src/CircleOptimizer.test.cpp b/compiler/luci/pass/src/CircleOptimizer.test.cpp

new file mode 100644 (file)

index 0000000..ca6dc77
--- /dev/null
+++ b/compiler/luci/pass/src/CircleOptimizer.test.cpp
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleOptimizer.h"
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+using Algorithms = luci::CircleOptimizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+
+TEST(CircleOptimizerTest, optimize_algorithms)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  // NOTE these are added to cover the test
+  // TODO add more if needed
+  options->enable(Algorithms::FoldAddV2);
+  options->enable(Algorithms::FoldCast);
+  options->enable(Algorithms::FoldDequantize);
+  options->enable(Algorithms::FoldSparseToDense);
+  options->enable(Algorithms::FusePreActivationBatchNorm);
+  options->enable(Algorithms::MakeBatchNormGammaPositive);
+  options->enable(Algorithms::ShuffleWeightTo16x1Float32);
+  options->enable(Algorithms::RemoveUnnecessaryReshape);
+  options->enable(Algorithms::RemoveUnnecessarySlice);
+  options->enable(Algorithms::RemoveUnnecessarySplit);
+  options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv);
+  options->enable(Algorithms::SubstituteTransposeToReshape);
+  options->enable(Algorithms::ConvertNCHWToNHWC);
+
+  o.optimize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleOptimizerTest, sparsify_simple)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::SparsifyTensorPass);
+  options->param(AlgorithmParameters::Sparsify_tensor_name, "dummy");
+  options->param(AlgorithmParameters::Sparsify_traversal_order, "dummy");
+  options->param(AlgorithmParameters::Sparsify_format, "ds");
+  options->param(AlgorithmParameters::Sparsify_block_size, "1,1");
+  options->param(AlgorithmParameters::Sparsify_block_map, "1,1");
+
+  o.sparsify(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleOptimizerTest, quantize_quantdequant_simple)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleOptimizerTest, quantize_quantdequant_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_quantdequant_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_quantdequant_gran_NEG)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_minmax_simple)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleOptimizerTest, quantize_minmax_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_minmax_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_minmax_gran_NEG)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_requant_simple)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "int8");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleOptimizerTest, quantize_requant_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_requant_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_dtype, "int8");
+  options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
diff --git a/compiler/luci/pass/src/CircleOptimizerUtils.cpp b/compiler/luci/pass/src/CircleOptimizerUtils.cpp

index ffc372392bad75d3296d9e9928f3cbf2f6530b83..127573db44a89eec0bfd4936d0851c15bfd0e5e1 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizerUtils.cpp
+++ b/compiler/luci/pass/src/CircleOptimizerUtils.cpp
@@ -16,74 +16,18 @@
  
  #include "CircleOptimizerUtils.h"
  
-namespace luci
-{
-
-bool in_array(const std::string &str, const std::vector<std::string> &array)
-{
-  return std::find(array.begin(), array.end(), str) != array.end();
-}
+#include <luci/IR/CircleNode.h>
  
-std::string to_string(const std::vector<std::string> &strings)
-{
-  assert(!strings.empty());
-
-  std::string res;
-  for (unsigned int i = 0; i < strings.size() - 1; i++)
-    res += strings[i] + ", ";
-
-  res += strings[strings.size() - 1];
-  return res;
-}
-
-std::string to_lower_case(std::string s)
-{
-  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
-  return s;
-}
-
-loco::DataType str_to_dtype(const std::string &str)
+namespace luci
  {
-  if (to_lower_case(str).compare("uint8") == 0)
-    return loco::DataType::U8;
-  if (to_lower_case(str).compare("uint16") == 0)
-    return loco::DataType::U16;
-  if (to_lower_case(str).compare("uint32") == 0)
-    return loco::DataType::U32;
-  if (to_lower_case(str).compare("uint64") == 0)
-    return loco::DataType::U64;
-
-  if (to_lower_case(str).compare("int8") == 0)
-    return loco::DataType::S8;
-  if (to_lower_case(str).compare("int16") == 0)
-    return loco::DataType::S16;
-  if (to_lower_case(str).compare("int32") == 0)
-    return loco::DataType::S32;
-  if (to_lower_case(str).compare("int64") == 0)
-    return loco::DataType::S64;
-
-  if (to_lower_case(str).compare("float16") == 0)
-    return loco::DataType::FLOAT16;
-  if (to_lower_case(str).compare("float32") == 0)
-    return loco::DataType::FLOAT32;
-  if (to_lower_case(str).compare("float64") == 0)
-    return loco::DataType::FLOAT64;
  
-  if (to_lower_case(str).compare("bool") == 0)
-    return loco::DataType::BOOL;
-
-  return loco::DataType::Unknown;
-}
-
-QuantizationGranularity str_to_granularity(const std::string &str)
+bool has_dynamic_shape(const loco::Node *node)
  {
-  if (to_lower_case(str).compare("layer") == 0)
-    return QuantizationGranularity::LayerWise;
-
-  if (to_lower_case(str).compare("channel") == 0)
-    return QuantizationGranularity::ChannelWise;
-
-  throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+  const auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+  for (uint32_t i = 0; i < circle_node->rank(); ++i)
+    if (!circle_node->dim(i).known())
+      return true;
+  return false;
  }
  
  } // namespace luci
diff --git a/compiler/luci/pass/src/CircleOptimizerUtils.h b/compiler/luci/pass/src/CircleOptimizerUtils.h

index 7e577a05f0f277d1be39852d9ccdd732ec6c7a88..e04942bfaa6feada5e483d3daef9d55f0fdcda94 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizerUtils.h
+++ b/compiler/luci/pass/src/CircleOptimizerUtils.h
@@ -17,25 +17,12 @@
  #ifndef __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
  #define __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
  
-#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
-#include "luci/Pass/QuantizeWithMinMaxPass.h"
-
  #include <loco.h>
  
-#include <algorithm>
-
  namespace luci
  {
  
-bool in_array(const std::string &, const std::vector<std::string> &);
-
-std::string to_string(const std::vector<std::string> &);
-
-std::string to_lower_case(std::string);
-
-loco::DataType str_to_dtype(const std::string &);
-
-QuantizationGranularity str_to_granularity(const std::string &);
+bool has_dynamic_shape(const loco::Node *node);
  
  } // namespace luci
  
diff --git a/compiler/luci/pass/src/CircleShapeInferencePass.cpp b/compiler/luci/pass/src/CircleShapeInferencePass.cpp

new file mode 100644 (file)

index 0000000..ddab224
--- /dev/null
+++ b/compiler/luci/pass/src/CircleShapeInferencePass.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "helpers/InferenceCandidates.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco.h>
+
+namespace
+{
+
+bool is_same_shape(luci::CircleNode *node, loco::TensorShape shape)
+{
+  if (node->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+
+  if (node->rank() != shape.rank())
+    return false;
+
+  for (uint32_t i = 0; i < node->rank(); ++i)
+  {
+    if (node->dim(i).known() != shape.dim(i).known())
+      return false;
+
+    if (node->dim(i).value() != shape.dim(i).value())
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool CircleShapeInferencePass::run(luci::Module *m)
+{
+  bool changed = false;
+
+  for (size_t g = 0; g < m->size(); ++g)
+  {
+    if (run(m->graph(g)))
+      changed = true;
+  }
+
+  return changed;
+}
+
+bool CircleShapeInferencePass::run(loco::Graph *g)
+{
+  luci::sinf::Rule shape_infer_rule;
+  bool changed = false;
+
+  for (auto node : inference_candidates(g))
+  {
+    loco::TensorShape shape;
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+    if (shape_infer_rule.infer(circle_node, shape) && !is_same_shape(circle_node, shape))
+    {
+      circle_node->rank(shape.rank());
+      for (uint32_t i = 0; i < shape.rank(); ++i)
+        circle_node->dim(i) = shape.dim(i);
+
+      circle_node->shape_status(luci::ShapeStatus::VALID);
+
+      changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/CircleShapeInferencePass.test.cpp b/compiler/luci/pass/src/CircleShapeInferencePass.test.cpp

new file mode 100644 (file)

index 0000000..cb3f1fe
--- /dev/null
+++ b/compiler/luci/pass/src/CircleShapeInferencePass.test.cpp
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(CircleShapeInferencePassTest, name)
+{
+  luci::CircleShapeInferencePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+/**
+ * This test is to check whether shape inference is done by topological order.
+ *
+ * When perm() of "transpose1" is changed from "old_perm" to "new_perm"
+ * by some of luci/Pass like below diagram, shape_status of "transpose1" is
+ * still VALID even the shape should be changed.
+ * If "transpose2" is visited first before shape of "transpose1" is updated,
+ * "transpose2" can reference the shape of "relu" which is not updated yet.
+ * Then shape of "transpose2" becomes 3x5x5x1 and it causes an error at "conv2d".
+ *
+ * <Initial graph>
+ *                                                4x1x1x3
+ * [old_perm] ----------+              [filter] ----------+
+ * (0,2,1,3)            |                                 |
+ *                      |               [bias]  ----------+
+ *                      |                                 |
+ *  input  ------> [transpose1] ------> [relu] ------> [conv2d] ------>  output
+ *         1x5x5x3              1x5x5x3        1x5x5x3          1x5x5x4
+ *
+ *
+ * <Right after transformation>
+ *                                                                        4x1x1x3
+ * [new_perm] ----------+-----------------------------------+    [filter] ------+
+ * (3,2,1,0)            |                                   |                   |
+ *                      |                                   |      [bias] ------+
+ *                      |                                   |                   |
+ *  input  ------> [transpose1] ------> [relu] ------> [transpose2] ------> [conv2d] ------>  output
+ *         1x5x5x3              1x5x5x3        1x5x5x3                 ?             1x5x5x4
+ *
+ *
+ * <Expected result>
+ *                                                                        4x1x1x3
+ * [new_perm] ----------+-----------------------------------+    [filter] ------+
+ * (3,2,1,0)            |                                   |                   |
+ *                      |                                   |      [bias] ------+
+ *                      |                                   |                   |
+ *  input  ------> [transpose1] ------> [relu] ------> [transpose2] ------> [conv2d] ------>  output
+ *         1x5x5x3              3x5x5x1        3x5x5x1              1x5x5x3          1x5x5x4
+ *
+ */
+TEST(CircleShapeInferencePassTest, original_node_change)
+{
+  luci::CircleShapeInferencePass pass;
+  auto g = loco::make_graph();
+
+  // Have to be packed into lambda to check throw
+  auto shape_inference_run = [&]() {
+    while (pass.run(g.get()) == true)
+      ;
+  };
+
+  // Create nodes to make relu traversed first
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto relu = g->nodes()->create<luci::CircleRelu>();
+  auto old_perm = g->nodes()->create<luci::CircleConst>();
+  auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+  auto filter = g->nodes()->create<luci::CircleConst>();
+  auto bias = g->nodes()->create<luci::CircleConst>();
+  auto conv2d = g->nodes()->create<luci::CircleConv2D>();
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  auto new_perm = g->nodes()->create<luci::CircleConst>();
+  auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+
+  // Build up initial graph
+  auto graph_input = g->inputs()->create();
+  graph_input->shape({1, 5, 5, 3});
+
+  input->index(graph_input->index());
+  input->shape({1, 5, 5, 3});
+  input->shape_status(luci::ShapeStatus::VALID);
+
+  old_perm->dtype(loco::DataType::S32);
+  old_perm->size<loco::DataType::S32>(4);
+  old_perm->shape({4});
+  old_perm->at<loco::DataType::S32>(0) = 0;
+  old_perm->at<loco::DataType::S32>(1) = 2;
+  old_perm->at<loco::DataType::S32>(2) = 1;
+  old_perm->at<loco::DataType::S32>(3) = 3;
+  old_perm->shape_status(luci::ShapeStatus::VALID);
+
+  transpose1->a(input);
+  transpose1->perm(old_perm);
+
+  relu->features(transpose1);
+
+  filter->dtype(loco::DataType::FLOAT32);
+  filter->size<loco::DataType::FLOAT32>(4 * 1 * 1 * 3);
+  filter->shape({4, 1, 1, 3});
+  filter->shape_status(luci::ShapeStatus::VALID);
+
+  bias->dtype(loco::DataType::FLOAT32);
+  bias->size<loco::DataType::FLOAT32>(4);
+  bias->shape({4});
+  bias->shape_status(luci::ShapeStatus::VALID);
+
+  conv2d->input(relu);
+  conv2d->filter(filter);
+  conv2d->bias(bias);
+  conv2d->padding(luci::Padding::VALID);
+  conv2d->stride()->h(1);
+  conv2d->stride()->w(1);
+  conv2d->dilation()->h(1);
+  conv2d->dilation()->w(1);
+
+  output->from(conv2d);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  graph_output->shape({1, 5, 5, 4});
+
+  ASSERT_NO_THROW(shape_inference_run());
+
+  // Transform graph
+  new_perm->dtype(loco::DataType::S32);
+  new_perm->size<loco::DataType::S32>(4);
+  new_perm->shape({4});
+  new_perm->at<loco::DataType::S32>(0) = 3;
+  new_perm->at<loco::DataType::S32>(1) = 2;
+  new_perm->at<loco::DataType::S32>(2) = 1;
+  new_perm->at<loco::DataType::S32>(3) = 0;
+  new_perm->shape_status(luci::ShapeStatus::VALID);
+
+  transpose1->perm(new_perm);
+
+  transpose2->a(relu);
+  transpose2->perm(new_perm);
+
+  conv2d->input(transpose2);
+
+  ASSERT_NO_THROW(shape_inference_run());
+
+  // Check result of shape inference is correct
+  ASSERT_EQ(3, transpose1->dim(0).value());
+  ASSERT_EQ(5, transpose1->dim(1).value());
+  ASSERT_EQ(5, transpose1->dim(2).value());
+  ASSERT_EQ(1, transpose1->dim(3).value());
+
+  ASSERT_EQ(3, relu->dim(0).value());
+  ASSERT_EQ(5, relu->dim(1).value());
+  ASSERT_EQ(5, relu->dim(2).value());
+  ASSERT_EQ(1, relu->dim(3).value());
+
+  ASSERT_EQ(1, transpose2->dim(0).value());
+  ASSERT_EQ(5, transpose2->dim(1).value());
+  ASSERT_EQ(5, transpose2->dim(2).value());
+  ASSERT_EQ(3, transpose2->dim(3).value());
+
+  ASSERT_EQ(1, conv2d->dim(0).value());
+  ASSERT_EQ(5, conv2d->dim(1).value());
+  ASSERT_EQ(5, conv2d->dim(2).value());
+  ASSERT_EQ(4, conv2d->dim(3).value());
+
+  SUCCEED();
+}
+
+/**
+ * This test is for checking when imported shape is wrong.
+ *
+ * Even "concat1" has wrong shape at first, correct shape should be inferred.
+ *
+ * <Initial graph>
+ *
+ *         1x1x1x1
+ *  input1 ------+                 8x7x6x5
+ *               +-----> [concat1] ------+
+ *  input2 ------+       (axis=3)        |                  1x1x2x3
+ *         1x1x1x2                       +------> [concat2] ------> output
+ *                                       |        (axis=2)
+ *                     1x1x1x3           |
+ *  input3 ------------------------------+
+ *
+ *
+ * <Expected result>
+ *
+ *         1x1x1x1
+ *  input1 ------+                 1x1x1x3
+ *               +-----> [concat1] ------+
+ *  input2 ------+       (axis=3)        |                  1x1x2x3
+ *         1x1x1x2                       +------> [concat2] ------> output
+ *                                       |        (axis=2)
+ *                     1x1x1x3           |
+ *  input3 ------------------------------+
+ */
+TEST(CircleShapeInferencePassTest, wrong_imported_shape)
+{
+  luci::CircleShapeInferencePass pass;
+  auto g = loco::make_graph();
+
+  // Have to be packed into lambda to check throw
+  auto shape_inference_run = [&]() {
+    while (pass.run(g.get()) == true)
+      ;
+  };
+
+  // Create nodes to make concat2 traversed first
+  auto concat2 = g->nodes()->create<luci::CircleConcatenation>(2);
+  auto concat1 = g->nodes()->create<luci::CircleConcatenation>(2);
+  auto input1 = g->nodes()->create<luci::CircleInput>();
+  auto input2 = g->nodes()->create<luci::CircleInput>();
+  auto input3 = g->nodes()->create<luci::CircleInput>();
+
+  // Build up initial graph
+  auto graph_input1 = g->inputs()->create();
+  auto graph_input2 = g->inputs()->create();
+  auto graph_input3 = g->inputs()->create();
+  graph_input1->shape({1, 1, 1, 1});
+  graph_input2->shape({1, 1, 1, 2});
+  graph_input2->shape({1, 1, 1, 3});
+
+  input1->index(graph_input1->index());
+  input1->shape({1, 1, 1, 1});
+  input1->shape_status(luci::ShapeStatus::VALID);
+
+  input2->index(graph_input2->index());
+  input2->shape({1, 1, 1, 2});
+  input2->shape_status(luci::ShapeStatus::VALID);
+
+  input3->index(graph_input3->index());
+  input3->shape({1, 1, 1, 3});
+  input3->shape_status(luci::ShapeStatus::VALID);
+
+  concat1->values(0, input1);
+  concat1->values(1, input2);
+  concat1->axis(3);
+  concat1->shape({8, 7, 6, 5}); // Intentionally set wrong shape
+  concat1->shape_status(luci::ShapeStatus::VALID);
+
+  concat2->values(0, concat1);
+  concat2->values(1, input3);
+  concat2->axis(2);
+
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  output->from(concat2);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  graph_output->shape({1, 1, 2, 3});
+
+  ASSERT_NO_THROW(shape_inference_run());
+
+  // Check result of shape inference is correct
+  ASSERT_EQ(1, concat1->dim(0).value());
+  ASSERT_EQ(1, concat1->dim(1).value());
+  ASSERT_EQ(1, concat1->dim(2).value());
+  ASSERT_EQ(3, concat1->dim(3).value());
+
+  ASSERT_EQ(1, concat2->dim(0).value());
+  ASSERT_EQ(1, concat2->dim(1).value());
+  ASSERT_EQ(2, concat2->dim(2).value());
+  ASSERT_EQ(3, concat2->dim(3).value());
+
+  SUCCEED();
+}
+
+/**
+ * This test is for checking that virtual operations which is not used for graph output
+ * but shape should be exported.
+ *
+ * Although "split_out2" is not used for graph output, shape should be inferenced.
+ *
+ * <Initial graph>
+ *
+ *
+ *          1x6                +----> [split_out1] ----> output
+ *  input ------> [split] -----+
+ *             (split_dim=1)   +----> [split_out2]
+ *             (num_split=2)
+ *
+ *
+ * <Expected result>
+ *                               1x3                1x3
+ *          1x6                +----> [split_out1] ----> output
+ *  input ------> [split] -----+
+ *             (split_dim=1)   +----> [split_out2]
+ *             (num_split=2)     1x3
+ */
+TEST(CircleShapeInferencePassTest, not_used_virtual_op)
+{
+  luci::CircleShapeInferencePass pass;
+  auto g = loco::make_graph();
+
+  // Have to be packed into lambda to check throw
+  auto shape_inference_run = [&]() {
+    while (pass.run(g.get()) == true)
+      ;
+  };
+
+  // Create nodes
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto split = g->nodes()->create<luci::CircleSplit>();
+  auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_dim = g->nodes()->create<luci::CircleConst>();
+
+  // Build up initial graph
+  auto graph_input1 = g->inputs()->create();
+  graph_input1->shape({1, 6});
+
+  input->index(graph_input1->index());
+  input->shape({1, 6});
+  input->shape_status(luci::ShapeStatus::VALID);
+
+  split_dim->dtype(loco::DataType::S32);
+  split_dim->size<loco::DataType::S32>(1);
+  split_dim->shape({1});
+  split_dim->at<loco::DataType::S32>(0) = 1;
+  split_dim->shape_status(luci::ShapeStatus::VALID);
+
+  split->split_dim(split_dim);
+  split->input(input);
+  split->num_split(2);
+
+  split_out1->input(split);
+  split_out1->index(0);
+
+  split_out2->input(split);
+  split_out2->index(1);
+
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  output->from(split_out1);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  graph_output->shape({1, 3});
+
+  ASSERT_NO_THROW(shape_inference_run());
+
+  // Check result of shape inference is correct
+  ASSERT_EQ(1, split_out1->dim(0).value());
+  ASSERT_EQ(3, split_out1->dim(1).value());
+
+  ASSERT_EQ(1, split_out2->dim(0).value());
+  ASSERT_EQ(3, split_out2->dim(1).value());
+
+  SUCCEED();
+}
diff --git a/compiler/luci/pass/src/CircleTypeInferencePass.cpp b/compiler/luci/pass/src/CircleTypeInferencePass.cpp

index 67bd253e0b81f5841d079b7d00d5c45082f128e6..fb3755ffa69a4026610c27aa435f47dcd242c968 100644 (file)
--- a/compiler/luci/pass/src/CircleTypeInferencePass.cpp
+++ b/compiler/luci/pass/src/CircleTypeInferencePass.cpp
@@ -14,6 +14,8 @@
   * limitations under the License.
   */
  
+#include "helpers/InferenceCandidates.h"
+
  #include "luci/Pass/CircleTypeInferencePass.h"
  
  #include <luci/Service/CircleTypeInference.h>
@@ -41,7 +43,7 @@ bool CircleTypeInferencePass::run(loco::Graph *g)
    luci::tinf::Rule type_infer_rule;
    bool changed = false;
  
-  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+  for (auto node : inference_candidates(g))
    {
      loco::DataType dtype;
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
diff --git a/compiler/luci/pass/src/CircleTypeInferencePass.test.cpp b/compiler/luci/pass/src/CircleTypeInferencePass.test.cpp

new file mode 100644 (file)

index 0000000..415424a
--- /dev/null
+++ b/compiler/luci/pass/src/CircleTypeInferencePass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleTypeInferencePassTest, name)
+{
+  luci::CircleTypeInferencePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp

new file mode 100644 (file)

index 0000000..c9022f1
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
@@ -0,0 +1,698 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Log.h>
+
+namespace
+{
+
+enum class DataFormat
+{
+  NCHW,
+  NHWC
+};
+
+/**
+ * @brief Set annotation for DataFormat (NCHW, NHWC)
+ *
+ * @note DataFormatAnnotation will live longer than this Pass (until the
+ *       annotated loco::Node is erased). So, do not use large data in the
+ *       annotation to avoid excessive memory usage.
+ */
+class DataFormatAnnotation final : public loco::NodeAnnotation
+{
+public:
+  DataFormatAnnotation(const DataFormat &format) : _format{format}
+  {
+    // DO NOTHING
+  }
+
+public:
+  const DataFormat &format(void) const { return _format; }
+
+private:
+  DataFormat _format;
+};
+
+void set_data_format(loco::Node *node, const DataFormat &format)
+{
+  node->annot(std::make_unique<DataFormatAnnotation>(format));
+}
+
+DataFormat get_data_format(loco::Node *node)
+{
+  assert(node->annot<DataFormatAnnotation>() != nullptr);
+  return node->annot<DataFormatAnnotation>()->format();
+}
+
+bool has_data_format(loco::Node *node) { return node->annot<DataFormatAnnotation>() != nullptr; }
+
+luci::CircleTranspose *create_4d_transpose(luci::CircleNode *node,
+                                           const std::vector<int32_t> indices)
+{
+  assert(indices.size() == 4);
+
+  auto name = node->name();
+  assert(name.length() > 0);
+
+  auto perm = node->graph()->nodes()->create<luci::CircleConst>();
+  perm->dtype(loco::DataType::S32);
+  perm->size<loco::DataType::S32>(4);
+  perm->rank(1);
+  perm->dim(0) = 4;
+  for (uint32_t i = 0; i < 4; i++)
+    perm->at<loco::DataType::S32>(i) = indices[i];
+  perm->shape_status(luci::ShapeStatus::VALID);
+
+  auto make_string = [](const std::vector<int32_t> &nums) {
+    std::string str;
+    for (auto num : nums)
+    {
+      if (str.length() > 0)
+        str += ".";
+      str += std::to_string(num);
+    }
+    return str;
+  };
+
+  auto str_indices = make_string(indices);
+
+  perm->name(name + "/Transpose_" + str_indices + "/perm");
+
+  auto trans = node->graph()->nodes()->create<luci::CircleTranspose>();
+  trans->perm(perm);
+  trans->name(name + "/Transpose_" + str_indices);
+  luci::add_origin(trans, luci::get_origin(node));
+
+  return trans;
+}
+
+int32_t nchw_axis_to_nhwc(int32_t axis)
+{
+  uint32_t pos_axis = axis >= 0 ? static_cast<uint32_t>(axis) : static_cast<uint32_t>(axis + 4);
+  static const uint32_t to_nhwc[4] = {0, 3, 1, 2};
+  if (pos_axis > 3)
+    throw std::runtime_error("Concat axis must be in range [-4, 4)");
+  return to_nhwc[pos_axis];
+}
+
+luci::CircleTranspose *create_post_transpose(luci::CircleNode *node)
+{
+  return create_4d_transpose(node, {0, 3, 1, 2});
+}
+
+luci::CircleTranspose *create_pre_transpose(luci::CircleNode *node)
+{
+  return create_4d_transpose(node, {0, 2, 3, 1});
+}
+
+uint32_t cal_offset(const loco::TensorShape &dimension, const uint32_t *indices)
+{
+  return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
+           dimension.dim(3).value() +
+         indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
+         indices[2] * dimension.dim(3).value() + indices[3];
+}
+
+luci::CircleConst *create_NHWC_paddings(luci::CircleConst *paddings)
+{
+  // paddings shape is (4,2) (it was checked by is_NCHW)
+  assert(paddings != nullptr);
+  assert(paddings->rank() == 2);
+  assert(paddings->dim(0).value() == 4);
+  assert(paddings->dim(1).value() == 2);
+
+  // paddings for idx 0~3 are 0 (checked by is_NCHW)
+  assert(paddings->at<loco::DataType::S32>(0) == 0);
+  assert(paddings->at<loco::DataType::S32>(1) == 0);
+  assert(paddings->at<loco::DataType::S32>(2) == 0);
+  assert(paddings->at<loco::DataType::S32>(3) == 0);
+
+  auto name = paddings->name();
+  assert(name.length() > 0);
+
+  auto nhwc_paddings = paddings->graph()->nodes()->create<luci::CircleConst>();
+  nhwc_paddings->dtype(loco::DataType::S32);
+  nhwc_paddings->shape({4, 2});
+  nhwc_paddings->shape_status(luci::ShapeStatus::VALID);
+  nhwc_paddings->size<loco::DataType::S32>(4 * 2);
+  nhwc_paddings->name(name + "_NHWC");
+
+  for (uint32_t dim = 0; dim < 4; dim++)
+  {
+    for (uint32_t i = 0; i < 2; i++)
+    {
+      int32_t data = 0;
+
+      if (dim == 1)
+      {
+        // get third dimension (H in NCHW)
+        data = paddings->at<loco::DataType::S32>(2 * 2 + i);
+      }
+      else if (dim == 2)
+      {
+        // get fourth dimension (W in NCHW)
+        data = paddings->at<loco::DataType::S32>(3 * 2 + i);
+      }
+
+      nhwc_paddings->at<loco::DataType::S32>(dim * 2 + i) = data;
+    }
+  }
+  return nhwc_paddings;
+}
+
+luci::CircleConst *create_NHWC_from_NCHW(luci::CircleConst *constant)
+{
+  LOGGER(l);
+  assert(constant->rank() == 4);
+
+  // TODO: Support non-float types
+  if (constant->dtype() != loco::DataType::FLOAT32)
+  {
+    INFO(l) << "Non-float type constant: " << constant->name() << std::endl;
+    return nullptr;
+  }
+
+  loco::TensorShape nchw_dimension{constant->dim(0), constant->dim(1), constant->dim(2),
+                                   constant->dim(3)};
+  loco::TensorShape nhwc_dimension{constant->dim(0), constant->dim(2), constant->dim(3),
+                                   constant->dim(1)};
+
+  auto name = constant->name();
+  assert(name.length() > 0);
+
+  auto nhwc_const = constant->graph()->nodes()->create<luci::CircleConst>();
+  nhwc_const->dtype(constant->dtype());
+  nhwc_const->rank(4);
+  nhwc_const->dim(0).set(constant->dim(0).value());
+  nhwc_const->dim(1).set(constant->dim(2).value());
+  nhwc_const->dim(2).set(constant->dim(3).value());
+  nhwc_const->dim(3).set(constant->dim(1).value());
+  nhwc_const->shape_status(luci::ShapeStatus::VALID);
+  nhwc_const->size<loco::DataType::FLOAT32>(constant->size<loco::DataType::FLOAT32>());
+  nhwc_const->name(name + "_NHWC");
+
+  for (uint32_t n = 0; n < nchw_dimension.dim(0).value(); n++)
+  {
+    for (uint32_t c = 0; c < nchw_dimension.dim(1).value(); c++)
+    {
+      for (uint32_t h = 0; h < nchw_dimension.dim(2).value(); h++)
+      {
+        for (uint32_t w = 0; w < nchw_dimension.dim(3).value(); w++)
+        {
+          uint32_t nchw_indices[4] = {n, c, h, w};
+          uint32_t nhwc_indices[4] = {n, h, w, c};
+          auto data =
+            constant->at<loco::DataType::FLOAT32>(cal_offset(nchw_dimension, nchw_indices));
+          nhwc_const->at<loco::DataType::FLOAT32>(cal_offset(nhwc_dimension, nhwc_indices)) = data;
+        }
+      }
+    }
+  }
+  return nhwc_const;
+}
+
+// NOTE Following conditions can be extended later
+//
+// Find PAD with an NCHW pattern described below
+//   - Paddings shape : [4, 2]
+//   - Paddings value : [[0, 0], [0, 0], [h_t, h_b], [w_t, w_b]]]
+bool is_NCHW(const luci::CirclePad *node)
+{
+  const auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+  // Non-const paddings is not supported
+  if (paddings == nullptr)
+    return false;
+
+  if (paddings->rank() != 2)
+    return false;
+
+  if (paddings->dim(0).value() != 4 || paddings->dim(1).value() != 2)
+    return false;
+
+  // Only check the first two dimensions
+  for (uint32_t dim = 0; dim < 2; dim++)
+  {
+    for (uint32_t i = 0; i < 2; i++)
+    {
+      auto data = paddings->at<loco::DataType::S32>(dim * 2 + i);
+      if (data != 0)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+// NOTE Following conditions can be extended later
+//
+// Find MUL with an NCHW pattern described below
+//   - Input (non-constant) shape : [N, C, H, W]
+//   - Input (constant) shape : [1, C, 1, 1]
+//   - Output shape : [N, C, H, W]
+bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node,
+                        luci::CircleConst *&multiplier)
+{
+  auto x = dynamic_cast<luci::CircleConst *>(node->x());
+  auto y = dynamic_cast<luci::CircleConst *>(node->y());
+
+  if (x != nullptr && y == nullptr)
+  {
+    pred_node = loco::must_cast<luci::CircleNode *>(node->y());
+    multiplier = x;
+  }
+  else if (x == nullptr && y != nullptr)
+  {
+    pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+    multiplier = y;
+  }
+  else
+  {
+    // Ignore if MUL does not have a multiplier input.
+    return false;
+  }
+
+  if (pred_node->rank() != 4)
+    return false;
+
+  const auto const_rank = multiplier->rank();
+  if (const_rank != 4)
+    return false;
+
+  for (uint32_t i = 0; i < const_rank; i++)
+  {
+    if (i != 1 && multiplier->dim(i).value() != 1)
+      return false;
+  }
+
+  const auto const_cdim = multiplier->dim(1);
+  const auto input_cdim = pred_node->dim(1);
+  const auto output_cdim = node->dim(1);
+
+  if (const_cdim == input_cdim && input_cdim == output_cdim)
+    return true;
+  else
+    return false;
+}
+
+// We assume ADD with const input is NCHW if,
+// Input shape: (N, C, H, W)
+// Output shape: (N, C, H, W)
+// 1. Const shape is (1, C, 1, 1)
+// 2. Input, Output, Const have the same C.
+bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_node,
+                        luci::CircleConst *&beta)
+{
+  auto x = dynamic_cast<luci::CircleConst *>(node->x());
+  auto y = dynamic_cast<luci::CircleConst *>(node->y());
+
+  if (x != nullptr && y == nullptr)
+  {
+    pred_node = loco::must_cast<luci::CircleNode *>(node->y());
+    beta = x;
+  }
+  else if (x == nullptr && y != nullptr)
+  {
+    pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+    beta = y;
+  }
+  else
+  {
+    // Ignore if ADD does not have a constant input.
+    return false;
+  }
+
+  if (pred_node->rank() != 4)
+    return false;
+
+  const auto const_rank = beta->rank();
+  if (const_rank != 4)
+    return false;
+
+  // Check the shape is (1, C, 1, 1)
+  for (uint32_t i = 0; i < const_rank; i++)
+  {
+    if (i == 1)
+      continue;
+
+    if (beta->dim(i).value() != 1)
+      return false;
+  }
+
+  const auto const_cdim = beta->dim(1);
+  const auto input_cdim = pred_node->dim(1);
+  const auto output_cdim = node->dim(1);
+
+  // Check Input, Output, Const have the same channel size
+  if (const_cdim == input_cdim && input_cdim == output_cdim)
+    return true;
+  else
+    return false;
+}
+
+template <class T> bool convert_unary_features(T *node)
+{
+  const auto pred_node = loco::must_cast<luci::CircleNode *>(node->features());
+  auto pre_trans = create_pre_transpose(node);
+  pre_trans->a(pred_node);
+  node->features(pre_trans);
+
+  // Do shape inference for this node again.
+  node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  auto post_trans = create_post_transpose(node);
+  loco::replace(node).with(post_trans);
+
+  post_trans->a(node);
+
+  return true;
+}
+
+class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
+{
+  // Default
+  bool visit(luci::CircleNode *node)
+  {
+    throw std::runtime_error(node->name() + " is an unsupported operator.");
+  }
+
+  bool visit(luci::CircleInput *node)
+  {
+    const auto n = node->dim(0);
+    const auto c = node->dim(1);
+    const auto h = node->dim(2);
+    const auto w = node->dim(3);
+
+    node->dim(1) = h;
+    node->dim(2) = w;
+    node->dim(3) = c;
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    // Insert post-tranpose
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    // Update graph input
+    auto graph_inputs = node->graph()->inputs();
+    auto graph_input = graph_inputs->at(node->index());
+    graph_input->shape({n, h, w, c});
+
+    return true;
+  }
+
+  bool visit(luci::CircleOutput *node)
+  {
+    // Insert pre-transpose
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(node->from());
+
+    node->from(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    // Update graph output
+    const auto n = node->dim(0).value();
+    const auto c = node->dim(1).value();
+    const auto h = node->dim(2).value();
+    const auto w = node->dim(3).value();
+
+    auto graph_outputs = node->graph()->outputs();
+    auto graph_output = graph_outputs->at(node->index());
+    graph_output->shape({n, h, w, c});
+
+    return true;
+  }
+
+  bool visit(luci::CircleAdd *node)
+  {
+    luci::CircleNode *pred_node = nullptr;
+    luci::CircleConst *beta = nullptr;
+
+    if (is_NCHW_with_const(node, pred_node, beta))
+    {
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(pred_node);
+
+      auto nhwc_const = create_NHWC_from_NCHW(beta);
+      if (nhwc_const == nullptr)
+        return false;
+
+      node->x(pre_trans);
+      node->y(nhwc_const);
+    }
+    else if (beta == nullptr)
+    {
+      // Both inputs are not constant.
+      // In this case, we cannot distinguish NCHW from NHWC,
+      // so just insert Transpose Ops.
+      auto pre_trans_x = create_pre_transpose(node);
+      pre_trans_x->a(node->x());
+      node->x(pre_trans_x);
+
+      auto pre_trans_y = create_pre_transpose(node);
+      pre_trans_y->a(node->y());
+      node->y(pre_trans_y);
+    }
+    else
+    {
+      return false;
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+    return true;
+  }
+
+  bool visit(luci::CircleConcatenation *node)
+  {
+    const auto num_values = node->numValues();
+    for (uint32_t i = 0; i < num_values; i++)
+    {
+      auto pred_node = loco::must_cast<luci::CircleNode *>(node->values(i));
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(pred_node);
+      node->values(i, pre_trans);
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    node->axis(nchw_axis_to_nhwc(node->axis()));
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  bool visit(luci::CircleLeakyRelu *node)
+  {
+    return convert_unary_features<luci::CircleLeakyRelu>(node);
+  }
+
+  bool visit(luci::CircleMul *node)
+  {
+    LOGGER(l);
+
+    luci::CircleNode *pred_node = nullptr;
+    luci::CircleConst *multiplier = nullptr;
+
+    if (is_NCHW_with_const(node, pred_node, multiplier))
+    {
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(pred_node);
+      node->x(pre_trans);
+
+      auto nhwc_const = create_NHWC_from_NCHW(multiplier);
+      node->y(nhwc_const);
+    }
+    else if (multiplier == nullptr)
+    {
+      // TODO : Implement this case.
+      INFO(l) << "Not yet implemented. Both inputs of MUL are non-const." << std::endl;
+      return false;
+    }
+    else
+    {
+      return false;
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+    return true;
+  }
+
+  bool visit(luci::CircleNeg *node)
+  {
+    const auto pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(pred_node);
+    node->x(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  bool visit(luci::CirclePad *node)
+  {
+    if (!is_NCHW(node))
+      return false;
+
+    const auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(pred_node);
+    node->input(pre_trans);
+
+    auto nchw_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+    const auto nhwc_paddings = create_NHWC_paddings(nchw_paddings);
+    node->paddings(nhwc_paddings);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  bool visit(luci::CircleRelu *node) { return convert_unary_features<luci::CircleRelu>(node); }
+
+  bool visit(luci::CircleRelu6 *node) { return convert_unary_features<luci::CircleRelu6>(node); }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "ConvertNCHWToNHWCPass Start" << std::endl;
+
+  // Annotate NCHW operators
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    switch (circle_node->opcode())
+    {
+      // List of supported Ops
+      case luci::CircleOpcode::CIRCLEINPUT:
+        if (!_preserve_input && !has_data_format(node))
+        {
+          set_data_format(node, DataFormat::NCHW);
+        }
+        break;
+      case luci::CircleOpcode::CIRCLEOUTPUT:
+        if (!_preserve_output && !has_data_format(node))
+        {
+          set_data_format(node, DataFormat::NCHW);
+        }
+        break;
+      case luci::CircleOpcode::ADD:
+      case luci::CircleOpcode::CONCATENATION:
+      case luci::CircleOpcode::LEAKY_RELU:
+      case luci::CircleOpcode::MUL:
+      case luci::CircleOpcode::NEG:
+      case luci::CircleOpcode::PAD:
+      case luci::CircleOpcode::RELU:
+      case luci::CircleOpcode::RELU6:
+        if (!has_data_format(node))
+        {
+          set_data_format(node, DataFormat::NCHW);
+        }
+        break;
+      default:
+        break;
+    }
+  }
+
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (!has_data_format(node))
+    {
+      // Unsupported Op
+      continue;
+    }
+    else if (get_data_format(node) == DataFormat::NHWC)
+    {
+      // Already converted to NHWC
+      continue;
+    }
+    else if (has_dynamic_shape(node))
+    {
+      // This pass only works for static-shaped node
+      INFO(l) << "Skip the node with a dynamic shape." << std::endl;
+      continue;
+    }
+    else
+    {
+      ConvertNCHWToNHWC converter;
+      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+      if (circle_node->rank() != 4)
+        continue;
+
+      if (circle_node->accept(&converter))
+      {
+        set_data_format(node, DataFormat::NHWC);
+        changed = true;
+      }
+      else
+      {
+        continue;
+      }
+    }
+  }
+
+  INFO(l) << "ConvertNCHWToNHWCPass End" << std::endl;
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp

new file mode 100644 (file)

index 0000000..831d5f8
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
@@ -0,0 +1,636 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph with a single Op (example: Add).
+ *
+ *  BEFORE
+ *  - All Ops including Input/Output are NCHW.
+ *
+ *             [Input] [beta]
+ *                |  /
+ *              [Add]
+ *                |
+ *             [Output]
+ *
+ *  AFTER
+ *  - All Ops including Input/Output are NHWC.
+ *
+ *             [Input]
+ *                |
+ *         [Transpose]
+ *                |
+ *        [Transpose] [beta]
+ *                |  /
+ *              [Add]
+ *                |
+ *         [Transpose]
+ *                |
+ *         [Transpose]
+ *                |
+ *             [Output]
+ */
+class SimpleGraph
+{
+public:
+  SimpleGraph() = default;
+
+public:
+  void init()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    output = g.nodes()->create<luci::CircleOutput>();
+    input->name("input");
+    output->name("output");
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    graph_input->dtype(loco::DataType::FLOAT32);
+    input->dtype(loco::DataType::FLOAT32);
+    output->dtype(loco::DataType::FLOAT32);
+    graph_output->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    graph_input->shape({1, channel_size, 4, 4});
+    input->shape({1, channel_size, 4, 4});
+    output->shape({1, channel_size, 4, 4});
+    graph_output->shape({1, channel_size, 4, 4});
+
+    auto graph_body = insertGraphBody(input);
+    output->from(graph_body);
+  }
+
+  virtual ~SimpleGraph() = default;
+
+protected:
+  virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class AddGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    add = g.nodes()->create<luci::CircleAdd>();
+    beta = g.nodes()->create<luci::CircleConst>();
+
+    add->dtype(loco::DataType::FLOAT32);
+    beta->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    add->shape({1, channel_size, 4, 4});
+    beta->shape({1, channel_size, 1, 1});
+
+    beta->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::FLOAT32>(i) = i;
+    }
+
+    add->x(input);
+    add->y(beta);
+
+    add->name("add");
+    beta->name("beta");
+
+    return add;
+  }
+
+public:
+  luci::CircleAdd *add = nullptr;
+  luci::CircleConst *beta = nullptr;
+};
+
+class ConcatenationGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    concat = g.nodes()->create<luci::CircleConcatenation>(2);
+    concat->values(0, input);
+    concat->axis(1);
+
+    input2 = g.nodes()->create<luci::CircleConst>();
+    input2->dtype(loco::DataType::FLOAT32);
+    input2->shape({1, 16, 4, 4});
+    input2->size<loco::DataType::FLOAT32>(16 * 4 * 4);
+    for (uint32_t i = 0; i < 16 * 4 * 4; i++)
+    {
+      input2->at<loco::DataType::FLOAT32>(i) = i;
+    }
+    concat->values(1, input2);
+
+    concat->name("concat");
+    input2->name("input2");
+
+    return concat;
+  }
+
+public:
+  luci::CircleConcatenation *concat = nullptr;
+  luci::CircleConst *input2 = nullptr;
+};
+
+class LeakyReluGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    leakyrelu = g.nodes()->create<luci::CircleLeakyRelu>();
+    leakyrelu->features(input);
+    leakyrelu->name("leakyrelu");
+
+    return leakyrelu;
+  }
+
+public:
+  luci::CircleLeakyRelu *leakyrelu = nullptr;
+};
+
+class MulGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    mul = g.nodes()->create<luci::CircleMul>();
+    multiplier = g.nodes()->create<luci::CircleConst>();
+
+    mul->dtype(loco::DataType::FLOAT32);
+    multiplier->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    mul->shape({1, channel_size, 4, 4});
+    multiplier->shape({1, channel_size, 1, 1});
+
+    multiplier->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      multiplier->at<loco::DataType::FLOAT32>(i) = i;
+    }
+
+    mul->x(input);
+    mul->y(multiplier);
+
+    mul->name("mul");
+    multiplier->name("multiplier");
+
+    return mul;
+  }
+
+public:
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *multiplier = nullptr;
+};
+
+class NegGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    neg = g.nodes()->create<luci::CircleNeg>();
+    neg->x(input);
+    neg->name("neg");
+
+    return neg;
+  }
+
+public:
+  luci::CircleNeg *neg = nullptr;
+};
+
+class PadGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    pad = g.nodes()->create<luci::CirclePad>();
+    paddings = g.nodes()->create<luci::CircleConst>();
+
+    pad->dtype(loco::DataType::FLOAT32);
+    paddings->dtype(loco::DataType::S32);
+
+    uint32_t channel_size = 16;
+    pad->shape({1, channel_size, 4, 4});
+    paddings->shape({4, 2});
+
+    // paddings data (NCHW)
+    // [[0,0], [0,0], [1,1], [2,2]]
+    paddings->size<loco::DataType::S32>(8);
+    for (uint32_t dim = 0; dim < 4; dim++)
+    {
+      for (uint32_t i = 0; i < 2; i++)
+      {
+        int32_t data = 0;
+
+        if (dim == 2)
+          data = 1;
+        else if (dim == 3)
+          data = 2;
+
+        paddings->at<loco::DataType::S32>(dim * 2 + i) = data;
+      }
+    }
+
+    pad->input(input);
+    pad->paddings(paddings);
+
+    pad->name("pad");
+    paddings->name("paddings");
+
+    return pad;
+  }
+
+public:
+  luci::CirclePad *pad = nullptr;
+  luci::CircleConst *paddings = nullptr;
+};
+
+class ReluGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu = g.nodes()->create<luci::CircleRelu>();
+    relu->features(input);
+    relu->name("Relu");
+
+    return relu;
+  }
+
+public:
+  luci::CircleRelu *relu = nullptr;
+};
+
+class Relu6Graph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu6 = g.nodes()->create<luci::CircleRelu6>();
+    relu6->features(input);
+    relu6->name("relu6");
+
+    return relu6;
+  }
+
+public:
+  luci::CircleRelu6 *relu6 = nullptr;
+};
+
+void check_pre_trans(loco::Node *node)
+{
+  auto pre_trans = dynamic_cast<luci::CircleTranspose *>(node);
+  EXPECT_NE(nullptr, pre_trans);
+  auto pre_trans_perm = dynamic_cast<luci::CircleConst *>(pre_trans->perm());
+  EXPECT_NE(nullptr, pre_trans_perm);
+  EXPECT_EQ(1, pre_trans_perm->rank());
+  EXPECT_EQ(4, pre_trans_perm->dim(0).value());
+  EXPECT_EQ(loco::DataType::S32, pre_trans_perm->dtype());
+  EXPECT_EQ(0, pre_trans_perm->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, pre_trans_perm->at<loco::DataType::S32>(1));
+  EXPECT_EQ(3, pre_trans_perm->at<loco::DataType::S32>(2));
+  EXPECT_EQ(1, pre_trans_perm->at<loco::DataType::S32>(3));
+}
+
+void check_post_trans(loco::Node *node)
+{
+  auto post_trans = dynamic_cast<luci::CircleTranspose *>(node);
+  EXPECT_NE(nullptr, post_trans);
+  auto post_trans_perm = dynamic_cast<luci::CircleConst *>(post_trans->perm());
+  EXPECT_NE(nullptr, post_trans_perm);
+  EXPECT_EQ(1, post_trans_perm->rank());
+  EXPECT_EQ(4, post_trans_perm->dim(0).value());
+  EXPECT_EQ(loco::DataType::S32, post_trans_perm->dtype());
+  EXPECT_EQ(0, post_trans_perm->at<loco::DataType::S32>(0));
+  EXPECT_EQ(3, post_trans_perm->at<loco::DataType::S32>(1));
+  EXPECT_EQ(1, post_trans_perm->at<loco::DataType::S32>(2));
+  EXPECT_EQ(2, post_trans_perm->at<loco::DataType::S32>(3));
+}
+
+void run_phase(loco::Graph *g, bool preserve_input, bool preserve_output)
+{
+  logo::Phase phase;
+
+  // Default passes.
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+  // Pass to test
+  phase.emplace_back(
+    std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output));
+
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.run(phase);
+}
+
+} // namespace
+
+TEST(ConvertNCHWToNHWCPassTest, name)
+{
+  luci::ConvertNCHWToNHWCPass pass(false, false);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ConvertNCHWToNHWC, Add)
+{
+  AddGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.add->x());
+
+  auto add_succs = loco::succs(g.add);
+  EXPECT_EQ(1, add_succs.size());
+  check_post_trans(*add_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(4, new_beta->rank());
+  EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(1, new_beta->dim(1).value());
+  EXPECT_EQ(1, new_beta->dim(2).value());
+  EXPECT_EQ(channel_size, new_beta->dim(3).value());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Concatenation)
+{
+  ConcatenationGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.concat->values(0));
+  check_pre_trans(g.concat->values(1));
+
+  auto concat_succs = loco::succs(g.concat);
+  EXPECT_EQ(1, concat_succs.size());
+  check_post_trans(*concat_succs.begin());
+
+  // Check concat shape, axis
+  EXPECT_EQ(1, g.concat->dim(0).value());
+  EXPECT_EQ(4, g.concat->dim(1).value());
+  EXPECT_EQ(4, g.concat->dim(2).value());
+  EXPECT_EQ(32, g.concat->dim(3).value());
+  EXPECT_EQ(3, g.concat->axis());
+}
+
+TEST(ConvertNCHWToNHWC, LeakyRelu)
+{
+  LeakyReluGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.leakyrelu->features());
+
+  auto leakyrelu_succs = loco::succs(g.leakyrelu);
+  EXPECT_EQ(1, leakyrelu_succs.size());
+  check_post_trans(*leakyrelu_succs.begin());
+
+  // Check leakyrelu shape
+  EXPECT_EQ(1, g.leakyrelu->dim(0).value());
+  EXPECT_EQ(4, g.leakyrelu->dim(1).value());
+  EXPECT_EQ(4, g.leakyrelu->dim(2).value());
+  EXPECT_EQ(16, g.leakyrelu->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Mul)
+{
+  MulGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.mul->x());
+
+  auto mul_succs = loco::succs(g.mul);
+  EXPECT_EQ(1, mul_succs.size());
+  check_post_trans(*mul_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
+  EXPECT_NE(nullptr, new_multiplier);
+  EXPECT_EQ(4, new_multiplier->rank());
+  EXPECT_EQ(1, new_multiplier->dim(0).value());
+  EXPECT_EQ(1, new_multiplier->dim(1).value());
+  EXPECT_EQ(1, new_multiplier->dim(2).value());
+  EXPECT_EQ(channel_size, new_multiplier->dim(3).value());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Neg)
+{
+  NegGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.neg->x());
+
+  auto neg_succs = loco::succs(g.neg);
+  EXPECT_EQ(1, neg_succs.size());
+  check_post_trans(*neg_succs.begin());
+
+  // Check leakyrelu shape
+  EXPECT_EQ(1, g.neg->dim(0).value());
+  EXPECT_EQ(4, g.neg->dim(1).value());
+  EXPECT_EQ(4, g.neg->dim(2).value());
+  EXPECT_EQ(16, g.neg->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Pad)
+{
+  PadGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.pad->input());
+
+  auto pad_succs = loco::succs(g.pad);
+  EXPECT_EQ(1, pad_succs.size());
+  check_post_trans(*pad_succs.begin());
+
+  auto new_paddings = dynamic_cast<luci::CircleConst *>(g.pad->paddings());
+  EXPECT_NE(nullptr, new_paddings);
+  EXPECT_EQ(2, new_paddings->rank());
+  EXPECT_EQ(4, new_paddings->dim(0).value());
+  EXPECT_EQ(2, new_paddings->dim(1).value());
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(0));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(1));
+  EXPECT_EQ(1, new_paddings->at<loco::DataType::S32>(2));
+  EXPECT_EQ(1, new_paddings->at<loco::DataType::S32>(3));
+  EXPECT_EQ(2, new_paddings->at<loco::DataType::S32>(4));
+  EXPECT_EQ(2, new_paddings->at<loco::DataType::S32>(5));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(6));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(7));
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Unknown_Shape_NEG)
+{
+  AddGraph g;
+  g.init();
+
+  // Unknown shape
+  g.input->dim(0).unset();
+  g.add->dim(0).unset();
+  g.output->dim(0).unset();
+
+  luci::ConvertNCHWToNHWCPass pass(false, false);
+  EXPECT_EQ(false, pass.run(&g.g));
+}
+
+TEST(ConvertNCHWToNHWC, Preserve_Input_Output)
+{
+  // Preserve input
+  {
+    AddGraph g;
+    g.init();
+
+    run_phase(&g.g, true, false);
+
+    // Check input shape
+    EXPECT_EQ(1, g.input->dim(0).value());
+    EXPECT_EQ(16, g.input->dim(1).value());
+    EXPECT_EQ(4, g.input->dim(2).value());
+    EXPECT_EQ(4, g.input->dim(3).value());
+
+    // Check output shape
+    EXPECT_EQ(1, g.output->dim(0).value());
+    EXPECT_EQ(4, g.output->dim(1).value());
+    EXPECT_EQ(4, g.output->dim(2).value());
+    EXPECT_EQ(16, g.output->dim(3).value());
+  }
+
+  // Preserve output
+  {
+    AddGraph g;
+    g.init();
+
+    run_phase(&g.g, false, true);
+
+    // Check input shape
+    EXPECT_EQ(1, g.input->dim(0).value());
+    EXPECT_EQ(4, g.input->dim(1).value());
+    EXPECT_EQ(4, g.input->dim(2).value());
+    EXPECT_EQ(16, g.input->dim(3).value());
+
+    // Check output shape
+    EXPECT_EQ(1, g.output->dim(0).value());
+    EXPECT_EQ(16, g.output->dim(1).value());
+    EXPECT_EQ(4, g.output->dim(2).value());
+    EXPECT_EQ(4, g.output->dim(3).value());
+  }
+
+  // Preserve both input and output
+  {
+    AddGraph g;
+    g.init();
+
+    run_phase(&g.g, true, true);
+
+    // Check input shape
+    EXPECT_EQ(1, g.input->dim(0).value());
+    EXPECT_EQ(16, g.input->dim(1).value());
+    EXPECT_EQ(4, g.input->dim(2).value());
+    EXPECT_EQ(4, g.input->dim(3).value());
+
+    // Check output shape
+    EXPECT_EQ(1, g.output->dim(0).value());
+    EXPECT_EQ(16, g.output->dim(1).value());
+    EXPECT_EQ(4, g.output->dim(2).value());
+    EXPECT_EQ(4, g.output->dim(3).value());
+  }
+}
+
+TEST(ConvertNCHWToNHWC, Relu)
+{
+  ReluGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.relu->features());
+
+  auto relu_succs = loco::succs(g.relu);
+  EXPECT_EQ(1, relu_succs.size());
+  check_post_trans(*relu_succs.begin());
+
+  // Check relu shape
+  EXPECT_EQ(1, g.relu->dim(0).value());
+  EXPECT_EQ(4, g.relu->dim(1).value());
+  EXPECT_EQ(4, g.relu->dim(2).value());
+  EXPECT_EQ(16, g.relu->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Relu6)
+{
+  Relu6Graph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.relu6->features());
+
+  auto relu6_succs = loco::succs(g.relu6);
+  EXPECT_EQ(1, relu6_succs.size());
+  check_post_trans(*relu6_succs.begin());
+
+  // Check relu6 shape
+  EXPECT_EQ(1, g.relu6->dim(0).value());
+  EXPECT_EQ(4, g.relu6->dim(1).value());
+  EXPECT_EQ(4, g.relu6->dim(2).value());
+  EXPECT_EQ(16, g.relu6->dim(3).value());
+}
diff --git a/compiler/luci/pass/src/FoldAddV2Pass.cpp b/compiler/luci/pass/src/FoldAddV2Pass.cpp

new file mode 100644 (file)

index 0000000..20c1022
--- /dev/null
+++ b/compiler/luci/pass/src/FoldAddV2Pass.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldAddV2Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <iostream>
+
+namespace
+{
+
+bool same_shape(const luci::CircleConst *x, const luci::CircleConst *y)
+{
+  if (x->rank() != y->rank())
+    return false;
+
+  for (uint32_t i = 0; i < x->rank(); i++)
+  {
+    if (!(x->dim(i) == y->dim(i)))
+      return false;
+  }
+
+  return true;
+}
+
+/**
+ * Fold AddV2 to const if both inputs are const
+ **/
+template <loco::DataType T> bool fold_add_v2(luci::CircleCustom *add_v2)
+{
+  // This should hold for AddV2
+  if (add_v2->numInputs() != 2)
+    return false;
+
+  // Check first input is const
+  auto x = dynamic_cast<luci::CircleConst *>(add_v2->inputs(0));
+  if (not x)
+    return false;
+
+  // Check second input is const
+  auto y = dynamic_cast<luci::CircleConst *>(add_v2->inputs(1));
+  if (not y)
+    return false;
+
+  if (x->dtype() != y->dtype())
+    return false;
+
+  if (!same_shape(x, y))
+    return false;
+
+  auto name_x = x->name();
+  auto name_y = y->name();
+  assert(name_x.length() > 0);
+  assert(name_y.length() > 0);
+  auto constant = add_v2->graph()->nodes()->create<luci::CircleConst>();
+  constant->dtype(x->dtype());
+  constant->rank(x->rank());
+  for (uint32_t i = 0; i < x->rank(); i++)
+    constant->dim(i).set(x->dim(i).value());
+
+  const auto size = x->size<T>();
+  constant->size<T>(size);
+  for (uint32_t i = 0; i < size; i++)
+    constant->at<T>(i) = x->at<T>(i) + y->at<T>(i);
+
+  constant->shape_status(luci::ShapeStatus::VALID);
+  constant->name(name_x + ";" + name_y);
+
+  for (auto succ : loco::succs(add_v2))
+  {
+    auto custom_out = loco::must_cast<luci::CircleCustomOut *>(succ);
+    loco::replace(custom_out).with(constant);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for AddV2 Op
+ **/
+bool FoldAddV2Pass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto custom = dynamic_cast<luci::CircleCustom *>(node))
+    {
+      if (custom->custom_code() == "AddV2")
+      {
+        // TODO: Support more data types
+        if (custom->dtype() == loco::DataType::S64)
+        {
+          if (fold_add_v2<loco::DataType::S64>(custom))
+            changed = true;
+        }
+      }
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldAddV2Pass.test.cpp b/compiler/luci/pass/src/FoldAddV2Pass.test.cpp

new file mode 100644 (file)

index 0000000..438d7f0
--- /dev/null
+++ b/compiler/luci/pass/src/FoldAddV2Pass.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldAddV2Pass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph has an AddV2 Op with constant inputs
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleCustom (AddV2)]
+ *                 |
+ *         [CircleCustomOut]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+template <loco::DataType T> class FoldAddV2Test : public luci::ConstantFoldingAddTestGraph
+{
+public:
+  FoldAddV2Test(std::initializer_list<uint32_t> shape) : luci::ConstantFoldingAddTestGraph(shape, T)
+  {
+    _addV2 = _g.nodes()->create<luci::CircleCustom>(2, 1);
+    _x = _g.nodes()->create<luci::CircleConst>();
+    _y = _g.nodes()->create<luci::CircleConst>();
+    _addV2_out = _g.nodes()->create<luci::CircleCustomOut>();
+
+    _addV2->dtype(T);
+    _x->dtype(T);
+    _y->dtype(T);
+    _addV2_out->dtype(T);
+
+    _addV2->shape(shape);
+    _x->shape(shape);
+    _y->shape(shape);
+    _addV2_out->shape(shape);
+
+    uint32_t num_elems = 1;
+    for (auto dim = shape.begin(); dim != shape.end(); dim++)
+      num_elems *= *dim;
+
+    _x->size<T>(num_elems);
+    _y->size<T>(num_elems);
+
+    for (uint32_t i = 0; i < num_elems; i++)
+    {
+      _x->at<T>(i) = i + 1;
+      _y->at<T>(i) = i + 1;
+    }
+
+    _addV2->custom_code("AddV2");
+    _addV2->inputs(0, _x);
+    _addV2->inputs(1, _y);
+    _addV2_out->input(_addV2);
+
+    _addV2->name("addV2");
+    _x->name("x");
+    _y->name("y");
+  }
+
+  loco::Node *createFoldedPattern() override { return _addV2_out; }
+
+  virtual ~FoldAddV2Test() = default;
+
+protected:
+  luci::CircleCustom *_addV2 = nullptr;
+  luci::CircleCustomOut *_addV2_out = nullptr;
+  luci::CircleConst *_x = nullptr;
+  luci::CircleConst *_y = nullptr;
+};
+
+class FoldS64AddV2Test : public FoldAddV2Test<loco::DataType::S64>, public ::testing::Test
+{
+public:
+  FoldS64AddV2Test() : FoldAddV2Test<loco::DataType::S64>({3}) {}
+
+  virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldAddV2PassTest, name)
+{
+  luci::FoldAddV2Pass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldS64AddV2Test, fold_addV2)
+{
+  luci::FoldAddV2Pass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Check type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(3, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+  EXPECT_EQ(4, folded_const->at<loco::DataType::S64>(1));
+  EXPECT_EQ(6, folded_const->at<loco::DataType::S64>(2));
+}
+
+TEST_F(FoldS64AddV2Test, input_type_mismatch_NEG)
+{
+  _x->dtype(loco::DataType::S32);
+
+  luci::FoldAddV2Pass pass;
+  EXPECT_FALSE(pass.run(graph()));
+}
diff --git a/compiler/luci/pass/src/FoldCastPass.cpp b/compiler/luci/pass/src/FoldCastPass.cpp

new file mode 100644 (file)

index 0000000..00b86fe
--- /dev/null
+++ b/compiler/luci/pass/src/FoldCastPass.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldCastPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+luci::CircleConst *cast_const(luci::CircleConst *node, loco::DataType from_dtype,
+                              loco::DataType to_dtype)
+{
+  assert(node->dtype() == from_dtype);
+
+  auto name = node->name();
+  assert(name.length() > 0);
+  auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+  constant->dtype(to_dtype);
+  constant->rank(node->rank());
+  uint32_t num_elems = 1;
+  for (uint32_t i = 0; i < node->rank(); i++)
+  {
+    constant->dim(i).set(node->dim(i).value());
+    num_elems *= node->dim(i).value();
+  }
+
+  constant->shape_status(luci::ShapeStatus::VALID);
+
+  // TODO: Support more data types
+  if (from_dtype == loco::DataType::S64)
+  {
+    if (to_dtype == loco::DataType::S32)
+    {
+      constant->size<loco::DataType::S32>(num_elems);
+      for (uint32_t i = 0; i < num_elems; i++)
+        constant->at<loco::DataType::S32>(i) =
+          static_cast<int32_t>(node->at<loco::DataType::S64>(i));
+
+      constant->name(name + "_S32");
+      return constant;
+    }
+    return nullptr;
+  }
+
+  return nullptr;
+}
+
+/**
+ * Fold Cast to const if it has const input
+ **/
+bool fold_cast(luci::CircleCast *cast)
+{
+  // Check cast has const input
+  auto const_x = dynamic_cast<luci::CircleConst *>(cast->x());
+  if (not const_x)
+    return false;
+
+  const auto in_dtype = const_x->dtype();
+  const auto out_dtype = cast->dtype();
+
+  auto casted_const = cast_const(const_x, in_dtype, out_dtype);
+  if (not casted_const)
+    return false;
+
+  loco::replace(cast).with(casted_const);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for Cast Op
+ **/
+bool FoldCastPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto cast = dynamic_cast<luci::CircleCast *>(node))
+    {
+      if (fold_cast(cast))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldCastPass.test.cpp b/compiler/luci/pass/src/FoldCastPass.test.cpp

new file mode 100644 (file)

index 0000000..5911adf
--- /dev/null
+++ b/compiler/luci/pass/src/FoldCastPass.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldCastPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+template <loco::DataType FromT, loco::DataType ToT>
+class FoldCastTest : public luci::ConstantFoldingAddTestGraph
+{
+public:
+  FoldCastTest(std::initializer_list<uint32_t> shape)
+    : luci::ConstantFoldingAddTestGraph(shape, ToT)
+  {
+    _cast = _g.nodes()->create<luci::CircleCast>();
+    _x = _g.nodes()->create<luci::CircleConst>();
+
+    _cast->dtype(ToT);
+    _x->dtype(FromT);
+
+    _cast->shape(shape);
+    _x->shape(shape);
+
+    uint32_t num_elems = 1;
+    for (auto dim = shape.begin(); dim != shape.end(); dim++)
+      num_elems *= *dim;
+
+    _x->size<FromT>(num_elems);
+    for (uint32_t i = 0; i < num_elems; i++)
+      _x->at<FromT>(i) = i + 1;
+
+    _cast->x(_x);
+
+    _cast->name("cast");
+    _x->name("x");
+  }
+
+  loco::Node *createFoldedPattern() override { return _cast; }
+
+protected:
+  luci::CircleCast *_cast = nullptr;
+  luci::CircleConst *_x = nullptr;
+};
+
+/**
+ *  Graph that has a Cast Op with constant input
+ *
+ *    BEFORE
+ *
+ *         [CircleConst]
+ *               |
+ *            [Cast]
+ *
+ *    AFTER
+ *
+ *         [CircleConst]
+ *
+ */
+class FoldS64ToS32CastTest : public FoldCastTest<loco::DataType::S64, loco::DataType::S32>,
+                             public ::testing::Test
+{
+public:
+  FoldS64ToS32CastTest() : FoldCastTest<loco::DataType::S64, loco::DataType::S32>({3}) {}
+
+  virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldCastPassTest, name)
+{
+  luci::FoldCastPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldS64ToS32CastTest, fold_cast_s64_to_s32)
+{
+  luci::FoldCastPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Check type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S32, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(3, folded_const->dim(0).value());
+  EXPECT_EQ(1, folded_const->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S32>(1));
+  EXPECT_EQ(3, folded_const->at<loco::DataType::S32>(2));
+}
diff --git a/compiler/luci/pass/src/FoldDequantizePass.cpp b/compiler/luci/pass/src/FoldDequantizePass.cpp

index 01c04f47806b0c8e9c49e1ba98d0b1064d453d9c..3dd4f8cea557da8ab8628aaa66e81b8a4c0aa76d 100644 (file)
--- a/compiler/luci/pass/src/FoldDequantizePass.cpp
+++ b/compiler/luci/pass/src/FoldDequantizePass.cpp
@@ -17,8 +17,7 @@
  #include "luci/Pass/FoldDequantizePass.h"
  
  #include <luci/IR/CircleNodes.h>
-
-#include <loco/Service/TypeInference.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  namespace
  {
@@ -51,6 +50,8 @@ luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
      throw std::runtime_error("Given constant node has no quantization parameter");
    }
  
+  auto name = const_node->name();
+  assert(name.length() > 0);
    auto g = const_node->graph();
    auto new_const_node = g->nodes()->create<luci::CircleConst>();
  
@@ -64,6 +65,7 @@ luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
    }
    new_const_node->size<loco::DataType::FLOAT32>(dim_size);
    new_const_node->shape_status(luci::ShapeStatus::VALID);
+  new_const_node->name(name + "_DQ");
  
    const int32_t q_dim = const_node->quantparam()->quantized_dimension;
    const int32_t q_dim_value = const_node->dim(q_dim).value();
@@ -81,8 +83,8 @@ luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
          qd = 0;
  
        new_const_node->at<loco::DataType::FLOAT32>(i) =
-          (float)(const_node->at<loco::DataType::S8>(i) - const_node->quantparam()->zerop.at(qd)) *
-          const_node->quantparam()->scale.at(qd);
+        (float)(const_node->at<loco::DataType::S8>(i) - const_node->quantparam()->zerop.at(qd)) *
+        const_node->quantparam()->scale.at(qd);
      }
    }
    else
@@ -94,9 +96,9 @@ luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
          qd = 0;
  
        new_const_node->at<loco::DataType::FLOAT32>(i) =
-          (float)((int)const_node->at<loco::DataType::U8>(i) -
-                  const_node->quantparam()->zerop.at(qd)) *
-          const_node->quantparam()->scale.at(qd);
+        (float)((int)const_node->at<loco::DataType::U8>(i) -
+                const_node->quantparam()->zerop.at(qd)) *
+        const_node->quantparam()->scale.at(qd);
      }
    }
  
@@ -192,6 +194,8 @@ bool FoldDequantizePass::run(loco::Graph *g)
              if (replace_const_node(const_node_user, const_node))
              {
                loco::replace(dequant).with(const_node_user);
+              luci::add_origin(loco::must_cast<luci::CircleNode *>(const_node_user),
+                               luci::get_origin(dequant));
                changed = true;
              }
            }
diff --git a/compiler/luci/pass/src/FoldDequantizePass.test.cpp b/compiler/luci/pass/src/FoldDequantizePass.test.cpp

new file mode 100644 (file)

index 0000000..d82a7bc
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDequantizePass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDequantizePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FoldDequantizePassTest, name)
+{
+  luci::FoldDequantizePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FoldSparseToDensePass.cpp b/compiler/luci/pass/src/FoldSparseToDensePass.cpp

new file mode 100644 (file)

index 0000000..0c6fc43
--- /dev/null
+++ b/compiler/luci/pass/src/FoldSparseToDensePass.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * Fold to const if
+ *
+ * 1. indices has 0-sized static shape such as [0]
+ *    (i.e., output is filled with default value)
+ * 2. default_value: const scalar
+ * 3. output_shape: const
+ *
+ * TODO: Support more general patterns
+ **/
+template <loco::DataType IndexT, loco::DataType ValueT>
+bool fold_sparse_to_dense(luci::CircleSparseToDense *stod)
+{
+  const auto indices = loco::must_cast<luci::CircleNode *>(stod->indices());
+  const auto default_value = loco::must_cast<luci::CircleConst *>(stod->default_value());
+  const auto output_shape = loco::must_cast<luci::CircleConst *>(stod->output_shape());
+
+  bool has_zero = false;
+  for (uint32_t i = 0; i < indices->rank(); i++)
+  {
+    if (indices->dim(i).known() && indices->dim(i).value() == 0)
+      has_zero = true;
+  }
+  if (!has_zero)
+    return false;
+
+  if (default_value->rank() != 0 || default_value->size<ValueT>() != 1)
+    return false;
+
+  auto rank = output_shape->size<IndexT>();
+  std::vector<uint32_t> shape;
+  for (uint32_t i = 0; i < rank; i++)
+  {
+    auto dim = output_shape->at<IndexT>(i);
+    assert(dim >= 0 && dim <= std::numeric_limits<uint32_t>::max());
+    if (!(dim >= 0 && dim <= std::numeric_limits<uint32_t>::max()))
+      return false;
+
+    shape.push_back(dim);
+  }
+
+  auto name = stod->name();
+  assert(name.length() > 0);
+  auto constant = stod->graph()->nodes()->create<luci::CircleConst>();
+  constant->dtype(default_value->dtype());
+  constant->rank(rank);
+  uint32_t dim_size = 1;
+  for (uint32_t i = 0; i < rank; i++)
+  {
+    constant->dim(i).set(shape[i]);
+    dim_size *= shape[i];
+  }
+
+  constant->size<ValueT>(dim_size);
+  const auto value = default_value->scalar<ValueT>();
+  for (uint32_t i = 0; i < dim_size; i++)
+    constant->at<ValueT>(i) = value;
+
+  constant->shape_status(luci::ShapeStatus::VALID);
+  constant->name(name + "_D");
+
+  loco::replace(stod).with(constant);
+
+  return true;
+}
+
+bool fold_sparse_to_dense(luci::CircleSparseToDense *stod)
+{
+  auto indices = loco::must_cast<luci::CircleNode *>(stod->indices());
+  auto default_value = dynamic_cast<luci::CircleConst *>(stod->default_value());
+  if (not default_value)
+    return false;
+
+  auto output_shape = dynamic_cast<luci::CircleConst *>(stod->output_shape());
+  if (not output_shape)
+    return false;
+
+  // Illegal input check
+  if (indices->dtype() != output_shape->dtype())
+    throw std::runtime_error("indices and output_shape of SparseToDense must have the same dtype");
+
+  // TODO: Support more data types
+  if (indices->dtype() == loco::DataType::S64)
+  {
+    if (default_value->dtype() == loco::DataType::S64)
+    {
+      return fold_sparse_to_dense<loco::DataType::S64, loco::DataType::S64>(stod);
+    }
+  }
+  return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for SparseToDense Op
+ **/
+bool FoldSparseToDensePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto stod = dynamic_cast<luci::CircleSparseToDense *>(node))
+    {
+      if (fold_sparse_to_dense(stod))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldSparseToDensePass.test.cpp b/compiler/luci/pass/src/FoldSparseToDensePass.test.cpp

new file mode 100644 (file)

index 0000000..7c6dcb0
--- /dev/null
+++ b/compiler/luci/pass/src/FoldSparseToDensePass.test.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph that has a SparseToDense Op with zero-sized indices
+ *
+ *    BEFORE
+ *    - shape of indices: [0,1]
+ *    - output_shape: [3]
+ *    - default_value: scalar 2
+ *
+ *     [indices] [output_shape] [values] [default_value]
+ *            |         |          |      |
+ *            +------[SparseToDense]------+
+ *
+ *    AFTER
+ *
+ *            [Const] (shape: [3], values: [2, 2, 2])
+ *
+ */
+class S64SparseToDenseZeroIndicesTest : public luci::ConstantFoldingAddTestGraph,
+                                        public ::testing::Test
+{
+public:
+  S64SparseToDenseZeroIndicesTest() : luci::ConstantFoldingAddTestGraph({3}, loco::DataType::S64) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    _stod = _g.nodes()->create<luci::CircleSparseToDense>();
+    _indices = _g.nodes()->create<luci::CircleConst>();
+    _output_shape = _g.nodes()->create<luci::CircleConst>();
+    _values = _g.nodes()->create<luci::CircleConst>();
+    _default_value = _g.nodes()->create<luci::CircleConst>();
+
+    _stod->dtype(loco::DataType::S64);
+    _indices->dtype(loco::DataType::S64);
+    _output_shape->dtype(loco::DataType::S64);
+    _values->dtype(loco::DataType::S64);
+    _default_value->dtype(loco::DataType::S64);
+
+    _indices->shape({0, 1});
+    _output_shape->shape({1});
+    _values->shape({0});
+    _default_value->rank(0);
+
+    _indices->size<loco::DataType::S64>(0);
+    _output_shape->size<loco::DataType::S64>(1);
+    _output_shape->at<loco::DataType::S64>(0) = 3;
+    _values->size<loco::DataType::S64>(0);
+    _default_value->size<loco::DataType::S64>(1);
+    _default_value->at<loco::DataType::S64>(0) = 2;
+
+    _stod->indices(_indices);
+    _stod->output_shape(_output_shape);
+    _stod->values(_values);
+    _stod->default_value(_default_value);
+
+    _stod->name("stod");
+    _indices->name("indices");
+    _output_shape->name("output_shape");
+    _values->name("values");
+    _default_value->name("default_value");
+
+    return _stod;
+  }
+
+protected:
+  luci::CircleSparseToDense *_stod = nullptr;
+  luci::CircleConst *_indices = nullptr;
+  luci::CircleConst *_output_shape = nullptr;
+  luci::CircleConst *_values = nullptr;
+  luci::CircleConst *_default_value = nullptr;
+};
+
+} // namespace
+
+TEST(FoldSparseToDensePassTest, name)
+{
+  luci::FoldSparseToDensePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(S64SparseToDenseZeroIndicesTest, fold_stod_with_zero_indices)
+{
+  luci::FoldSparseToDensePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(3, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(1));
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(2));
+}
+
+TEST_F(S64SparseToDenseZeroIndicesTest, illegal_input_NEG)
+{
+  _indices->dtype(loco::DataType::S32);
+
+  luci::FoldSparseToDensePass pass;
+  EXPECT_ANY_THROW(pass.run(graph()));
+}
diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp

new file mode 100644 (file)

index 0000000..2c990f0
--- /dev/null
+++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+namespace
+{
+
+luci::CircleReshape *as_reshape(loco::Node *node)
+{
+  return dynamic_cast<luci::CircleReshape *>(node);
+}
+
+luci::CircleConst *clone_shape(luci::CircleReshape *reshape)
+{
+  const auto shape = dynamic_cast<luci::CircleConst *>(reshape->shape());
+  // only support CircleConst for now
+  if (shape == nullptr)
+    return nullptr;
+
+  // NOTE tflite and circle only supports S32
+  // TODO just check with assert() after import handles this
+  auto dtype = shape->dtype();
+  if (dtype != loco::DataType::S32)
+    return nullptr;
+
+  return luci::clone(shape);
+}
+
+void copy_shape(luci::CircleReshape *reshape, luci::CircleReshape *new_reshape)
+{
+  auto ns_rank = reshape->newShape()->rank();
+  new_reshape->newShape()->rank(ns_rank);
+  for (uint32_t r = 0; r < ns_rank; ++r)
+    new_reshape->newShape()->dim(r) = reshape->newShape()->dim(r);
+}
+
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
+{
+  assert(reshape != nullptr);
+  assert(neg != nullptr);
+
+  luci::CircleConst *cloned_shape = clone_shape(reshape);
+  if (cloned_shape == nullptr)
+    return false;
+
+  auto name = reshape->name();
+  assert(name.length() > 0);
+  loco::Graph *graph = neg->graph();
+  // create reshape placed after neg
+  luci::CircleReshape *new_reshape = graph->nodes()->create<luci::CircleReshape>();
+  copy_shape(reshape, new_reshape);
+  new_reshape->shape(cloned_shape);
+  new_reshape->name(name + "_C");
+  luci::add_origin(new_reshape, luci::get_origin(reshape));
+
+  // reconnect network
+  loco::replace(neg).with(new_reshape);
+  neg->x(reshape->tensor());
+  new_reshape->tensor(neg);
+
+  // Do shape inference for this node again.
+  neg->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  return true;
+}
+
+class ForwardReshape final : public luci::CircleNodeMutableVisitor<bool>
+{
+protected:
+  bool visit(luci::CircleNode *node)
+  {
+    LOGGER(l);
+    INFO(l) << "ForwardReshape: Unsupported operator: " << node->name() << std::endl;
+    return false;
+  }
+
+  bool visit(luci::CircleNeg *node)
+  {
+    auto reshape = as_reshape(node->x());
+    if (reshape == nullptr)
+      return false;
+    return forward_reshape(reshape, node);
+  }
+
+  // TODO add more unary operators
+};
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *                       |
+ *                  [CircleNode]  [CircleConst]
+ *                       |       /
+ *                 [CircleReshape]
+ *                /      |
+ *     [CircleNode]  [(UnaryOp)]
+ *          |            |     \
+ *          |            |      [CircleNode]
+ *          |            |           |
+ *
+ *   UnaryOp: CircleNeg, ...
+ *
+ * AFTER
+ *                       |
+ *   [CircleConst]  [CircleNode]
+ *         |       /     |
+ *  [CircleReshape] [(UnaryOp)] [CircleConst]
+ *         |             |      /
+ *   [CircleNode] [CircleReshape]
+ *         |             |      \
+ *         |             |       [CircleNode]
+ *         |             |            |
+ *
+ *   Note: new [CircleReshape] after [(UnaryOp)] added
+ */
+bool ForwardReshapeToUnaryOpPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  ForwardReshape forward;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (circle_node->accept(&forward))
+      changed = true;
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp

new file mode 100644 (file)

index 0000000..2593a01
--- /dev/null
+++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace
+{
+
+using namespace luci::test;
+
+class ReshapeNegGraphlet
+{
+public:
+  ReshapeNegGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    std::vector<uint32_t> shape_out_v = shape_out;
+
+    _reshape_shape = g->nodes()->create<luci::CircleConst>();
+    _reshape = g->nodes()->create<luci::CircleReshape>();
+    _neg = g->nodes()->create<luci::CircleNeg>();
+
+    _reshape_shape->dtype(loco::DataType::S32);
+    _reshape_shape->rank(1);
+    _reshape_shape->dim(0).set(shape_out_v.size());
+    _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+    // values
+    const auto size = shape_out_v.size();
+    _reshape_shape->size<loco::DataType::S32>(size);
+    for (uint32_t i = 0; i < size; i++)
+      _reshape_shape->at<loco::DataType::S32>(i) = shape_out_v[i];
+
+    _reshape_shape->name("reshape_shape");
+    _reshape->name("reshape");
+    _neg->name("neg");
+  }
+
+protected:
+  luci::CircleReshape *_reshape = nullptr;
+  luci::CircleNeg *_neg = nullptr;
+  luci::CircleConst *_reshape_shape = nullptr;
+};
+
+class ForwardReshapeToNegGraph : public TestIOGraph, public ReshapeNegGraphlet
+{
+public:
+  ForwardReshapeToNegGraph() = default;
+
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    ReshapeNegGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _reshape->tensor(input());
+    _reshape->shape(_reshape_shape);
+    _neg->x(_reshape);
+
+    output()->from(_neg);
+  }
+};
+
+class ForwardReshapeToNegGraphTest : public ::testing::Test
+{
+public:
+  ForwardReshapeToNegGraphTest() = default;
+
+  void run_pass(void)
+  {
+    while (_pass.run(_graph.g()))
+      ;
+  }
+
+protected:
+  ForwardReshapeToNegGraph _graph;
+  luci::ForwardReshapeToUnaryOpPass _pass;
+};
+
+} // namespace
+
+TEST(ForwardReshapeToUnaryOpPassTest, name)
+{
+  luci::ForwardReshapeToUnaryOpPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(ForwardReshapeToNegGraphTest, simple_forward)
+{
+  _graph.init({2, 2, 2}, {2, 4});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto neg = dynamic_cast<luci::CircleNeg *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, neg);
+  neg = dynamic_cast<luci::CircleNeg *>(reshape->tensor());
+  ASSERT_NE(nullptr, neg);
+}
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp

index 844541d2de1e51b83254a01db18d790b0ff4c799..66e3415187d31c50fb4a31e4958f67a154584056 100644 (file)
--- a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
+++ b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
@@ -17,7 +17,9 @@
  #include "luci/Pass/FuseActivationFunctionPass.h"
  
  #include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeMixins.h>
  #include <luci/IR/CircleOpcode.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  namespace luci
  {
@@ -32,10 +34,15 @@ bool fuse_activation_function(luci::CircleNode *node)
      return false;
  
    auto node_with_fused_act =
-      dynamic_cast<luci::LuciNodeMixin<luci::LuciNodeTrait::FusedActFunc> *>(pred_node);
+    dynamic_cast<luci::CircleNodeMixin<luci::CircleNodeTrait::FusedActFunc> *>(pred_node);
    if (node_with_fused_act == nullptr)
      return false;
  
+  // TODO remove this work-around
+  // This will skip fuse for concat as luci-interpreter doesn't support this yet
+  if (dynamic_cast<luci::CircleConcatenation *>(pred_node) != nullptr)
+    return false;
+
    auto fused_act = node_with_fused_act->fusedActivationFunction();
  
    luci::FusedActFunc target_func = luci::FusedActFunc::UNDEFINED;
@@ -76,6 +83,7 @@ bool fuse_activation_function(luci::CircleNode *node)
      return false;
  
    node_with_fused_act->fusedActivationFunction(target_func);
+  luci::add_origin(pred_node, luci::get_origin(node));
    loco::replace(node).with(pred_node);
  
    node->drop();
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp

index 226a303a1f26065432170c55992fd9dee4218e98..56b414143d376f35803ba6723a9f302e20b7c678 100644 (file)
--- a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
+++ b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
@@ -14,15 +14,19 @@
   * limitations under the License.
   */
  
-#include "FuseActivationFunctionPassInternal.h"
+#include "luci/Pass/FuseActivationFunctionPass.h"
  
  #include <luci/IR/CircleNodes.h>
  
+#include <luci/test/TestIOGraph.h>
+
  #include <gtest/gtest.h>
  
  namespace
  {
  
+using namespace luci::test;
+
  /**
   *  Simple graph for test
   *
@@ -41,60 +45,148 @@ namespace
   *         [Conv2]
   *
   */
-class SimpleGraph
+class ConvReluConvGraphlet
+{
+public:
+  ConvReluConvGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _conv1 = g->nodes()->create<luci::CircleConv2D>();
+    _conv2 = g->nodes()->create<luci::CircleConv2D>();
+    _relu = g->nodes()->create<luci::CircleRelu>();
+    _conv1_f = g->nodes()->create<luci::CircleConst>();
+    _conv1_b = g->nodes()->create<luci::CircleConst>();
+    _conv2_f = g->nodes()->create<luci::CircleConst>();
+    _conv2_b = g->nodes()->create<luci::CircleConst>();
+
+    _conv1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _conv1->name("conv1");
+    _conv2->name("conv2");
+    _relu->name("relu");
+    _conv1_f->name("conv1f");
+    _conv1_b->name("conv1b");
+    _conv2_f->name("conv2f");
+    _conv2_b->name("conv2b");
+  }
+
+public:
+  luci::CircleRelu *relu() { return _relu; }
+  luci::CircleConv2D *conv1() { return _conv1; }
+  luci::CircleConv2D *conv2() { return _conv2; }
+
+protected:
+  luci::CircleConv2D *_conv1 = nullptr;
+  luci::CircleConv2D *_conv2 = nullptr;
+  luci::CircleRelu *_relu = nullptr;
+  luci::CircleConst *_conv1_f = nullptr;
+  luci::CircleConst *_conv1_b = nullptr;
+  luci::CircleConst *_conv2_f = nullptr;
+  luci::CircleConst *_conv2_b = nullptr;
+};
+
+class FuseActTestGraph : public TestIOGraph, public ConvReluConvGraphlet
  {
  public:
-  SimpleGraph()
+  FuseActTestGraph() = default;
+
+  void init(void)
    {
-    conv1 = g.nodes()->create<luci::CircleConv2D>();
-    conv2 = g.nodes()->create<luci::CircleConv2D>();
-    relu = g.nodes()->create<luci::CircleRelu>();
+    TestIOGraph::init({1}, {1});
+    ConvReluConvGraphlet::init(g());
  
-    conv1->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _conv1->input(input());
+    _conv1->filter(_conv1_f);
+    _conv1->bias(_conv1_b);
  
-    relu->features(conv1);
-    conv2->input(relu);
+    _relu->features(_conv1);
+
+    _conv2->input(_relu);
+    _conv2->filter(_conv2_f);
+    _conv2->bias(_conv2_b);
+
+    output()->from(_conv2);
    }
+};
  
+class ConvHasMultiSuccGraph : public TestIOGraph, public ConvReluConvGraphlet
+{
  public:
-  loco::Graph g;
-  luci::CircleConv2D *conv1;
-  luci::CircleConv2D *conv2;
-  luci::CircleRelu *relu;
+  ConvHasMultiSuccGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    ConvReluConvGraphlet::init(g());
+
+    _conv1->input(input());
+    _conv1->filter(_conv1_f);
+    _conv1->bias(_conv1_b);
+
+    _relu->features(_conv1);
+
+    _conv2->input(_conv1);
+    _conv2->filter(_conv2_f);
+    _conv2->bias(_conv2_b);
+
+    output()->from(_relu); // We need to check from relu
+  }
  };
  
+// TODO use ::testing::Test
+
  } // namespace
  
+TEST(FuseActivationFunctionPassTest, name)
+{
+  luci::FuseActivationFunctionPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
  TEST(FusePreActivationBatchNorm, fuse_activation_function)
  {
-  SimpleGraph g;
+  FuseActTestGraph g;
+  luci::FuseActivationFunctionPass pass;
  
-  EXPECT_TRUE(luci::fuse_activation_function(g.relu));
+  g.init();
  
-  EXPECT_EQ(g.conv1, g.conv2->input());
+  EXPECT_TRUE(pass.run(g.g()));
+  EXPECT_EQ(g.conv1(), g.conv2()->input());
  }
  
  TEST(FusePreActivationBatchNorm, fuse_activation_function_dup_relu)
  {
-  SimpleGraph g;
-  g.conv1->fusedActivationFunction(luci::FusedActFunc::RELU);
+  FuseActTestGraph g;
+  luci::FuseActivationFunctionPass pass;
  
-  EXPECT_TRUE(luci::fuse_activation_function(g.relu));
+  g.init();
+  g.conv1()->fusedActivationFunction(luci::FusedActFunc::RELU);
  
-  EXPECT_EQ(g.conv1, g.conv2->input());
+  EXPECT_TRUE(pass.run(g.g()));
+  EXPECT_EQ(g.conv1(), g.conv2()->input());
  }
  
-TEST(FusePreActivationBatchNorm, fuse_activation_function_NEG)
+TEST(FusePreActivationBatchNorm, fuse_activation_function_mulsucc_NEG)
  {
-  SimpleGraph g;
-  g.conv2->input(g.conv1);
+  ConvHasMultiSuccGraph g;
+  luci::FuseActivationFunctionPass pass;
+
+  g.init();
  
-  // Conv1 has multiple successors
-  EXPECT_FALSE(luci::fuse_activation_function(g.relu));
+  // Relu input Conv2D has multiple successors
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePreActivationBatchNorm, fuse_activation_function_tanh_NEG)
+{
+  FuseActTestGraph g;
+  luci::FuseActivationFunctionPass pass;
  
-  g.conv2->input(g.relu);
-  g.conv1->fusedActivationFunction(luci::FusedActFunc::TANH);
+  g.init();
+  g.conv1()->fusedActivationFunction(luci::FusedActFunc::TANH);
  
-  // Conv1 already has activation function
-  EXPECT_FALSE(luci::fuse_activation_function(g.relu));
+  // Relu input Conv2D already has activation function
+  EXPECT_FALSE(pass.run(g.g()));
  }
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPassInternal.h b/compiler/luci/pass/src/FuseActivationFunctionPassInternal.h

deleted file mode 100644 (file)

index 0cfb9d5..0000000
--- a/compiler/luci/pass/src/FuseActivationFunctionPassInternal.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_FUSE_ACTIVATION_FUNCTION_PASS_INTERNAL_H__
-#define __LUCI_CIRCLE_FUSE_ACTIVATION_FUNCTION_PASS_INTERNAL_H__
-
-#include <luci/IR/CircleNodes.h>
-
-namespace luci
-{
-
-//  Fuse activation function with preceding Op
-/// @return true if success
-bool fuse_activation_function(luci::CircleNode *node);
-
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_FUSE_ACTIVATION_FUNCTION_PASS_INTERNAL_H__
diff --git a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp

index bd7805f6a8beb3a5bff249f25ee39ebe79b9eb3c..2bca57014dac5deff1e5aa39cf829a58d4ebf3a7 100644 (file)
--- a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
+++ b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
@@ -17,20 +17,30 @@
  #include "luci/Pass/FuseAddWithTConvPass.h"
  
  #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  namespace
  {
  /**
- *  Fuse add to TCONV if possible
+ *  Fuse Add to TransposeConv if possible
   *
   *  BEFORE
- *
- *         [CircleTransposeConv]
+ *                     |
+ *   [CircleConst]  [CircleTransposeConv]
+ *               \     |
+ *             [CircleAdd]
   *                  |
- *                [add]
+ *
   *  AFTER
+ *                  |
+ *   [CircleConst]  |
+ *             \    |
+ *         [CircleTransposeConv]   [CircleAdd]
+ *                  |
+ *            ([CircleRelu6])
+ *                  |
   *
- *         [CircleTransposeConv]
+ *  Note: CircleRelu6 is inserted if Add activation is ReLU6
   */
  bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
  {
@@ -81,9 +91,13 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
  
    if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
    {
+    auto name = addition->name();
+    assert(name.length() > 0);
      // separate relu op from add op
      auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
      relu->features(tconv);
+    relu->name(name + "/Relu6");
+    luci::add_origin(relu, luci::get_origin(add));
  
      // remove add node
      replace(add).with(relu);
@@ -93,6 +107,9 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
      replace(add).with(tconv);
    }
  
+  // set origin
+  luci::add_origin(tconv, luci::get_origin(add));
+
    return true;
  }
  
diff --git a/compiler/luci/pass/src/FuseAddWithTConvPass.test.cpp b/compiler/luci/pass/src/FuseAddWithTConvPass.test.cpp

new file mode 100644 (file)

index 0000000..8748d73
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithTConvPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithTConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseAddWithTConvPassTest, name)
+{
+  luci::FuseAddWithTConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp

index c0583d848b3c9d4c2464634c307c4a24bd73cbf5..09180d8c154b2fcde88e4f1f39e0879cecaf05dd 100644 (file)
--- a/compiler/luci/pass/src/FuseBCQPass.cpp
+++ b/compiler/luci/pass/src/FuseBCQPass.cpp
@@ -17,6 +17,7 @@
  #include "luci/Pass/FuseBCQPass.h"
  
  #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  #include <luci/Log.h>
  
  #include <cassert>
@@ -111,7 +112,7 @@ template <> class BCQFuser<1>
  {
  public:
    BCQFuser<1>(int32_t original_output_cnt, int32_t bundle_cnt)
-      : _original_output_cnt{original_output_cnt}, _bundle_cnt{bundle_cnt}
+    : _original_output_cnt{original_output_cnt}, _bundle_cnt{bundle_cnt}
    {
      // Do nothing
    }
@@ -133,7 +134,7 @@ public:
        {
          const auto prefix = (output_node->index() - (_original_output_cnt + 1)) / (_bundle_cnt);
          const MetadataType metadata_type = static_cast<MetadataType>(
-            (output_node->index() - (_original_output_cnt + 1)) % (_bundle_cnt));
+          (output_node->index() - (_original_output_cnt + 1)) % (_bundle_cnt));
          const auto circle_node = loco::must_cast<luci::CircleNode *>(output_node->from());
          add_BCQ_info_node(prefix, metadata_type, circle_node);
        }
@@ -156,13 +157,18 @@ public:
            if (prefix == -1 || !is_valid_prefix(prefix))
              continue;
  
+          auto name = gather->name();
+          assert(name.length() > 0);
+
            auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
+          luci::add_origin(bcq_gather, luci::get_origin(gather));
  
            bcq_gather->op_version(1);
            bcq_gather->input_scales(alpha(g, prefix));
            bcq_gather->input_binary(packed_binary_code(g, prefix));
            bcq_gather->indices(gather->indices());
            bcq_gather->input_clusters(packed_clusters(g, prefix));
+          bcq_gather->name(name + "/BCQGather");
  
            if (_do_w_x[prefix]->at<loco::DataType::BOOL>(0))
            {
@@ -177,7 +183,7 @@ public:
              bcq_gather->axis(axis_transpose);
  
              const auto indices_rank =
-                loco::must_cast<luci::CircleNode *>(gather->indices())->rank();
+              loco::must_cast<luci::CircleNode *>(gather->indices())->rank();
  
              auto perm = g->nodes()->create<luci::CircleConst>();
              perm->dtype(loco::DataType::S32);
@@ -188,10 +194,13 @@ public:
                perm->at<loco::DataType::S32>(idx) = idx + 1;
              perm->at<loco::DataType::S32>(indices_rank) = 0;
              perm->shape_status(luci::ShapeStatus::VALID);
+            perm->name(name + "/Transpose/perm");
  
              auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+            luci::add_origin(output_transpose, luci::get_origin(gather));
              output_transpose->a(bcq_gather);
              output_transpose->perm(perm);
+            output_transpose->name(name + "/Transpose");
  
              loco::replace(gather).with(output_transpose);
            }
@@ -209,7 +218,11 @@ public:
            if (prefix == -1 || !is_valid_prefix(prefix))
              continue;
  
+          auto name = fully_connected->name();
+          assert(name.length() > 0);
+
            auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+          luci::add_origin(bcq_fc, luci::get_origin(fully_connected));
  
            bcq_fc->op_version(1);
            bcq_fc->weights_scales(alpha(g, prefix));
@@ -217,6 +230,7 @@ public:
            bcq_fc->bias(fully_connected->bias());
            bcq_fc->weights_clusters(packed_clusters(g, prefix));
            bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
+          bcq_fc->name(name + "/BCQFullyConnected");
  
            loco::Node *bcq_input = fully_connected->input();
  
@@ -231,18 +245,16 @@ public:
              new_shape->rank(1);
              new_shape->dim(0) = 2;
  
-            auto batch_size = 1;
-            for (uint32_t i = 0; i < original_input->rank() - 1; ++i)
-              batch_size *= original_input->dim(i).value();
-
-            new_shape->at<loco::DataType::S32>(0) = batch_size;
-            new_shape->at<loco::DataType::S32>(1) =
-                original_input->dim(original_input->rank() - 1).value();
+            new_shape->at<loco::DataType::S32>(0) = -1;
+            new_shape->at<loco::DataType::S32>(1) = weights->dim(1).value();
              new_shape->shape_status(luci::ShapeStatus::VALID);
+            new_shape->name(name + "/Reshape/shape");
  
              auto reshape = g->nodes()->create<luci::CircleReshape>();
+            luci::add_origin(reshape, luci::get_origin(fully_connected));
              reshape->tensor(original_input);
              reshape->shape(new_shape);
+            reshape->name(name + "/Reshape");
  
              bcq_input = reshape;
            }
@@ -258,23 +270,28 @@ public:
            perm->at<loco::DataType::S32>(0) = 1;
            perm->at<loco::DataType::S32>(1) = 0;
            perm->shape_status(luci::ShapeStatus::VALID);
+          perm->name(name + "/Transpose/perm");
  
            auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+          luci::add_origin(input_transpose, luci::get_origin(fully_connected));
            input_transpose->a(bcq_input);
            input_transpose->perm(perm);
+          input_transpose->name(name + "_input/Transpose");
  
            bcq_fc->input(input_transpose);
  
            auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+          luci::add_origin(output_transpose, luci::get_origin(fully_connected));
            output_transpose->a(bcq_fc);
            output_transpose->perm(perm);
+          output_transpose->name(name + "_output/Transpose");
  
            loco::replace(fully_connected).with(output_transpose);
  
            return true;
          }
          else if (auto weights_as_input =
-                     dynamic_cast<luci::CircleConst *>(fully_connected->input()))
+                   dynamic_cast<luci::CircleConst *>(fully_connected->input()))
          {
            auto prefix = get_prefix_of_const(weights_as_input);
            if (prefix == -1 || !is_valid_prefix(prefix))
@@ -282,6 +299,9 @@ public:
  
            assert(_do_w_x[prefix]->at<loco::DataType::BOOL>(0) == true);
  
+          auto name = weights_as_input->name();
+          assert(name.length() > 0);
+
            auto perm = g->nodes()->create<luci::CircleConst>();
            perm->dtype(loco::DataType::S32);
            perm->size<loco::DataType::S32>(2);
@@ -290,12 +310,16 @@ public:
            perm->at<loco::DataType::S32>(0) = 1;
            perm->at<loco::DataType::S32>(1) = 0;
            perm->shape_status(luci::ShapeStatus::VALID);
+          perm->name(name + "/Transpose/perm");
  
            auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+          luci::add_origin(input_transpose, luci::get_origin(fully_connected));
            input_transpose->a(fully_connected->weights());
            input_transpose->perm(perm);
+          input_transpose->name(name + "/Transpose");
  
            auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+          luci::add_origin(bcq_fc, luci::get_origin(fully_connected));
  
            assert(dynamic_cast<luci::CircleOutputExclude *>(fully_connected->bias()) != nullptr);
  
@@ -308,6 +332,8 @@ public:
  
            bcq_fc->weights_hidden_size(weights_as_input->dim(1).value());
            bcq_fc->input(input_transpose);
+          bcq_fc->name(name + "/BCQFullyConnected");
+
            loco::replace(fully_connected).with(bcq_fc);
  
            return true;
@@ -533,7 +559,7 @@ private:
      new_beta->dim(1) = _packed_binary_code[prefix]->dim(1);
      for (uint32_t i = 0; i < _packed_binary_code[prefix]->size<loco::DataType::S32>(); ++i)
        new_beta->at<loco::DataType::S32>(i) =
-          _packed_binary_code[prefix]->at<loco::DataType::S32>(i);
+        _packed_binary_code[prefix]->at<loco::DataType::S32>(i);
      new_beta->shape_status(luci::ShapeStatus::VALID);
  
      return new_beta;
@@ -556,9 +582,9 @@ private:
      for (int i = 0; i < number_of_clusters; ++i)
      {
        packed_clusters->at<loco::DataType::S32>(i * 2) =
-          qbits_of_clusters->at<loco::DataType::S32>(i);
+        qbits_of_clusters->at<loco::DataType::S32>(i);
        packed_clusters->at<loco::DataType::S32>(i * 2 + 1) =
-          size_of_clusters->at<loco::DataType::S32>(i);
+        size_of_clusters->at<loco::DataType::S32>(i);
      }
  
      return packed_clusters;
diff --git a/compiler/luci/pass/src/FuseBCQPass.test.cpp b/compiler/luci/pass/src/FuseBCQPass.test.cpp

new file mode 100644 (file)

index 0000000..73677af
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBCQPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBCQPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBCQPassTest, name)
+{
+  luci::FuseBCQPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseBatchNormWithConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithConvPass.cpp

new file mode 100644 (file)

index 0000000..062da70
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithConvPass.cpp
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse Mul-Add to Conv2D if possible.
+ *
+ *  NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ *  BEFORE
+ *                  |   [CircleConst]
+ *                  |  / [CircleConst]
+ *                  | / /
+ *         [CircleConv2D] [CircleConst]
+ *                  |    /
+ *            [CircleMul] [CircleConst]
+ *                  |    /
+ *             [CircleAdd]
+ *                  |
+ *
+ *  AFTER
+ *                  |                  [CircleConst]
+ *                  +--------------+  / [CircleConst]
+ *                  |              | / /
+ *                  |     [CircleConv2D] [CircleConst]
+ *  [CircleConst]   |              |    /
+ * [CircleConst] \  |         [CircleMul] [CircleConst]
+ *              \ \ |              |     /
+ *           [CircleConv2D]   [CircleAdd]
+ *                  |
+ */
+bool fused_batch_norm_with_conv(luci::CircleAdd *add)
+{
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *shift = nullptr;
+  if (auto add_lhs = dynamic_cast<luci::CircleMul *>(add->x()))
+  {
+    mul = add_lhs;
+    shift = dynamic_cast<luci::CircleConst *>(add->y());
+  }
+  else if (auto add_rhs = dynamic_cast<luci::CircleMul *>(add->y()))
+  {
+    mul = add_rhs;
+    shift = dynamic_cast<luci::CircleConst *>(add->x());
+  }
+
+  // If CircleMul is not found or constant operand of CircleAdd is not found,
+  // this pass cannot be applied.
+  if (mul == nullptr || shift == nullptr)
+    return false;
+
+  // If FusedActivationFunction of mul is not none, this pass cannot be applied.
+  if (mul->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+
+  // To apply this pass, shape of shift should be [1, 1, 1, out_channel].
+  if (shift->rank() != 4)
+    return false;
+  for (uint32_t i = 0; i < 3; ++i)
+    if (shift->dim(i).value() != 1)
+      return false;
+
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleConst *scale = nullptr;
+  if (auto mul_lhs = dynamic_cast<luci::CircleConv2D *>(mul->x()))
+  {
+    conv = mul_lhs;
+    scale = dynamic_cast<luci::CircleConst *>(mul->y());
+  }
+  else if (auto mul_rhs = dynamic_cast<luci::CircleConv2D *>(mul->y()))
+  {
+    conv = mul_rhs;
+    scale = dynamic_cast<luci::CircleConst *>(mul->x());
+  }
+
+  // If CircleConv2D is not found or constant operand of CircleMul is not found,
+  // this pass cannot be applied.
+  if (conv == nullptr || scale == nullptr)
+    return false;
+
+  // To apply this pass, shape of scale should be [1, 1, 1, out_channel].
+  if (scale->rank() != 4)
+    return false;
+  for (uint32_t i = 0; i < 3; ++i)
+    if (scale->dim(i).value() != 1)
+      return false;
+
+  // If FusedActivationFunction of conv is not none, this pass cannot be applied.
+  if (conv->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+
+  luci::CircleConst *filter = dynamic_cast<luci::CircleConst *>(conv->filter());
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(conv->bias());
+
+  // If filter or bias of conv is not const, this pass cannot be applied.
+  if (filter == nullptr || bias == nullptr)
+    return false;
+
+  // If dtype of filter is different with scale and shift, multiplication may be impossible.
+  if (filter->dtype() != scale->dtype())
+    return false;
+  if (filter->dtype() != shift->dtype())
+    return false;
+
+  // TODO Support more data type
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // Output channel dimension should be same. If not, this pass cannot be applied.
+  if (filter->dim(0).value() != scale->dim(3).value())
+    return false;
+  if (filter->dim(0).value() != shift->dim(3).value())
+    return false;
+
+  auto name = add->name();
+  assert(name.length() > 0);
+
+  luci::CircleConv2D *fused_conv = add->graph()->nodes()->create<luci::CircleConv2D>();
+  luci::CircleConst *fused_filter = add->graph()->nodes()->create<luci::CircleConst>();
+  luci::CircleConst *fused_bias = add->graph()->nodes()->create<luci::CircleConst>();
+
+  uint32_t filter_out_channel = filter->dim(0).value();
+  uint32_t filter_height = filter->dim(1).value();
+  uint32_t filter_width = filter->dim(2).value();
+  uint32_t filter_in_channel = filter->dim(3).value();
+
+  // Copy filter
+  fused_filter->dtype(filter->dtype());
+  fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+  fused_filter->rank(4);
+  fused_filter->dim(0).set(filter_out_channel);
+  fused_filter->dim(1).set(filter_height);
+  fused_filter->dim(2).set(filter_width);
+  fused_filter->dim(3).set(filter_in_channel);
+  fused_filter->shape_status(luci::ShapeStatus::VALID);
+  fused_filter->name(name + "/Conv2D/filter");
+
+  // Fuse scale to new filter
+  for (uint32_t c = 0; c < filter_out_channel; c++)
+  {
+    for (uint32_t h = 0; h < filter_height; h++)
+    {
+      for (uint32_t w = 0; w < filter_width; w++)
+      {
+        for (uint32_t b = 0; b < filter_in_channel; b++)
+        {
+          uint32_t offset = c * filter_height * filter_width * filter_in_channel +
+                            h * filter_width * filter_in_channel + w * filter_in_channel + b;
+          fused_filter->at<loco::DataType::FLOAT32>(offset) =
+            filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(c);
+        }
+      }
+    }
+  }
+
+  // Copy bias
+  assert(bias->rank() == 1);
+  assert(bias->dim(0).value() == filter_out_channel);
+  fused_bias->dtype(bias->dtype());
+  fused_bias->size<loco::DataType::FLOAT32>(bias->size<loco::DataType::FLOAT32>());
+  fused_bias->rank(1);
+  fused_bias->dim(0).set(filter_out_channel);
+  fused_bias->shape_status(luci::ShapeStatus::VALID);
+  fused_bias->name(name + "/Conv2D/bias");
+
+  // Fuse scale and shift to bias
+  for (uint32_t b = 0; b < filter_out_channel; ++b)
+  {
+    fused_bias->at<loco::DataType::FLOAT32>(b) =
+      bias->at<loco::DataType::FLOAT32>(b) * scale->at<loco::DataType::FLOAT32>(b) +
+      shift->at<loco::DataType::FLOAT32>(b);
+  }
+
+  // Set attributes of fused_conv
+  fused_conv->input(conv->input());
+  fused_conv->filter(fused_filter);
+  fused_conv->bias(fused_bias);
+  fused_conv->fusedActivationFunction(add->fusedActivationFunction());
+  fused_conv->padding(conv->padding());
+  fused_conv->stride()->h(conv->stride()->h());
+  fused_conv->stride()->w(conv->stride()->w());
+  fused_conv->dilation()->h(conv->dilation()->h());
+  fused_conv->dilation()->w(conv->dilation()->w());
+  fused_conv->name(name + "/Conv2D");
+  luci::add_origin(fused_conv, luci::composite_origin({luci::get_origin(add), luci::get_origin(mul),
+                                                       luci::get_origin(conv)}));
+
+  replace(add).with(fused_conv);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+    {
+      if (fused_batch_norm_with_conv(add))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseBatchNormWithConvPass.test.cpp b/compiler/luci/pass/src/FuseBatchNormWithConvPass.test.cpp

new file mode 100644 (file)

index 0000000..96bc2bd
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithConvPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithConvPassTest, name)
+{
+  luci::FuseBatchNormWithConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.cpp

new file mode 100644 (file)

index 0000000..8b2286f
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.cpp
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse Mul-Add to DepthwiseConv2D if possible.
+ *
+ *  NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ *  BEFORE
+ *                     |   [CircleConst]
+ *                     |   / [CircleConst]
+ *                     |  / /
+ *    [CircleDepthwiseConv2D] [CircleConst]
+ *                     |     /
+ *                [CircleMul] [CircleConst]
+ *                     |     /
+ *                [CircleAdd]
+ *                     |
+ *
+ *  AFTER
+ *                     |                                          [CircleConst]
+ *                     +-------------------------------------+   / [CircleConst]
+ *                     |                                     |  / /
+ *                     |                    [CircleDepthwiseConv2D] [CircleConst]
+ *                     |    [CircleConst]                    |     /
+ *                     |   / [CircleConst]              [CircleMul] [CircleConst]
+ *                     |  / /                                |     /
+ *    [CircleDepthwiseConv2D]                           [CircleAdd]
+ *                     |
+ *
+ */
+
+/**
+ * @brief Check shape is [x] or [1, 1, 1, x]
+ */
+bool is_scale_shift_shape(luci::CircleConst *node)
+{
+  auto rank = node->rank();
+  if (rank != 1 && rank != 4)
+    return false;
+  for (uint32_t r = 0; r < rank - 1; ++r)
+  {
+    if (node->dim(r).value() != 1)
+      return false;
+  }
+  return true;
+}
+
+bool fused_batch_norm_with_dwconv(luci::CircleAdd *add)
+{
+  assert(add != nullptr);
+
+  // Find the pattern of CircleDepthwiseConv2D - CircleMul - CircleAdd
+  luci::CircleConst *scale = nullptr;
+  luci::CircleConst *shift = nullptr;
+  luci::CircleDepthwiseConv2D *dwconv = nullptr;
+  luci::CircleMul *mul = nullptr;
+  if (not luci::fill(&shift, &mul).with_commutative_args_of(add))
+    return false;
+  if (not luci::fill(&scale, &dwconv).with_commutative_args_of(mul))
+    return false;
+
+  // check scale and shift constant attributes
+  // scale and shift can be [x] or [1, 1, 1, x]
+  if (not is_scale_shift_shape(scale))
+    return false;
+  if (not is_scale_shift_shape(shift))
+    return false;
+
+  // check mul, add attributes
+  if (mul->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (mul->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+  // TODO support more Activations
+  if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+    return false;
+
+  // get weight of dwconv
+  auto filter = dynamic_cast<luci::CircleConst *>(dwconv->filter());
+  if (not filter)
+    return false;
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (filter->rank() != 4)
+    return false;
+
+  // check attributes of dwconv
+  if (dwconv->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+  if (dwconv->depthMultiplier() < 0) // can this happen?
+    return false;
+
+  // get bias of dwconv
+  auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias());
+  if (not bias)
+    return false;
+  if (bias->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (bias->rank() != 1)
+    return false;
+
+  // filter represents as [1, H, W, C*M] where M is multiplier.
+  auto filter_out_chn = filter->dim(3).value();
+  auto multiplier = static_cast<uint32_t>(dwconv->depthMultiplier());
+  auto srank = scale->rank(); // as rank can be 1 or 4
+  if (filter_out_chn != scale->dim(srank - 1).value() * multiplier)
+    return false;
+  srank = shift->rank();
+  if (filter_out_chn != shift->dim(srank - 1).value() * multiplier)
+    return false;
+  auto channel = filter_out_chn / multiplier;
+
+  auto name = add->name();
+  assert(name.length() > 0);
+
+  loco::Graph *graph = add->graph();
+  luci::CircleDepthwiseConv2D *fused_dwconv = graph->nodes()->create<luci::CircleDepthwiseConv2D>();
+  luci::CircleConst *fused_filter = graph->nodes()->create<luci::CircleConst>();
+  luci::CircleConst *fused_bias = graph->nodes()->create<luci::CircleConst>();
+
+  auto filter_in_chn = filter->dim(0).value();
+  auto filter_height = filter->dim(1).value();
+  auto filter_width = filter->dim(2).value();
+  assert(filter_in_chn == 1);
+
+  // Copy filter shape
+  fused_filter->dtype(filter->dtype());
+  fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+  fused_filter->rank(4);
+  fused_filter->dim(0).set(filter_in_chn);
+  fused_filter->dim(1).set(filter_height);
+  fused_filter->dim(2).set(filter_width);
+  fused_filter->dim(3).set(filter_out_chn);
+  fused_filter->shape_status(luci::ShapeStatus::VALID);
+  fused_filter->name(name + "/DepthwiseConv2D/filter");
+
+  // fused filter weight = filter weight * mul(scale) + add(shift)
+  for (uint32_t b = 0; b < filter_in_chn; b++)
+  {
+    for (uint32_t h = 0; h < filter_height; h++)
+    {
+      for (uint32_t w = 0; w < filter_width; w++)
+      {
+        for (uint32_t c = 0; c < filter_out_chn; c++)
+        {
+          uint32_t offset = b * filter_height * filter_width * filter_out_chn +
+                            h * filter_width * filter_out_chn + w * filter_out_chn + c;
+          uint32_t chn = c / multiplier;
+          fused_filter->at<loco::DataType::FLOAT32>(offset) =
+            filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(chn);
+        }
+      }
+    }
+  }
+
+  // Fuse bias with scale and shift
+  fused_bias->dtype(shift->dtype());
+  fused_bias->size<loco::DataType::FLOAT32>(shift->size<loco::DataType::FLOAT32>());
+  fused_bias->rank(1);
+  fused_bias->dim(0).set(channel);
+  fused_bias->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t c = 0; c < channel; ++c)
+  {
+    fused_bias->at<loco::DataType::FLOAT32>(c) =
+      bias->at<loco::DataType::FLOAT32>(c) * scale->at<loco::DataType::FLOAT32>(c) +
+      shift->at<loco::DataType::FLOAT32>(c);
+  }
+  fused_bias->name(name + "/DepthwiseConv2D/bias");
+
+  // set new tconv properties
+  fused_dwconv->input(dwconv->input());
+  fused_dwconv->filter(fused_filter);
+  fused_dwconv->bias(fused_bias);
+  fused_dwconv->fusedActivationFunction(add->fusedActivationFunction());
+  fused_dwconv->padding(dwconv->padding());
+  fused_dwconv->stride()->h(dwconv->stride()->h());
+  fused_dwconv->stride()->w(dwconv->stride()->w());
+  fused_dwconv->depthMultiplier(dwconv->depthMultiplier());
+  fused_dwconv->dilation()->h(dwconv->dilation()->h());
+  fused_dwconv->dilation()->w(dwconv->dilation()->w());
+  fused_dwconv->name(name + "/DepthwiseConv2D");
+  luci::add_origin(fused_dwconv,
+                   luci::composite_origin(
+                     {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(dwconv)}));
+
+  replace(add).with(fused_dwconv);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithDwConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+    {
+      if (fused_batch_norm_with_dwconv(add))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.test.cpp b/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.test.cpp

new file mode 100644 (file)

index 0000000..3030a73
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithDwConvPassTest, name)
+{
+  luci::FuseBatchNormWithDwConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp

deleted file mode 100644 (file)

index 95ccd81..0000000
--- a/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/FuseBatchNormWithTConv.h"
-
-#include <luci/IR/CircleNodes.h>
-
-namespace
-{
-/**
- *  NOTE TF's fusedBatchNorm is converted to mul and add of Circle.
- *
- *  BEFORE
- *
- *         [CircleTransposeConv]
- *                  |
- *                [mul]
- *                  |
- *                [add]
- *  AFTER
- *
- *         [CircleTransposeConv]
- */
-bool fused_batch_norm_with_tconv(luci::CircleTransposeConv *tconv)
-{
-  // check whether it has bias or not. This optimization works only if it doesn't.
-  auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias());
-  if (not bias)
-    return false;
-
-  // get weight of tconv
-  auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
-  if (not filter)
-    return false;
-  if (filter->dtype() != loco::DataType::FLOAT32)
-    return false;
-
-  // get mul node
-  auto tconv_output = loco::succs(tconv);
-  assert(tconv_output.size() == 1);
-  auto mul = dynamic_cast<luci::CircleMul *>(*tconv_output.begin());
-  if (not mul)
-    return false;
-  if (mul->dtype() != loco::DataType::FLOAT32)
-    return false;
-
-  // get add node
-  auto mul_output = loco::succs(mul);
-  assert(mul_output.size() == 1);
-  auto add = dynamic_cast<luci::CircleAdd *>(*mul_output.begin());
-  if (not add)
-    return false;
-  if (add->dtype() != loco::DataType::FLOAT32)
-    return false;
-  if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
-      add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
-    return false;
-
-  // get scale of batchnorm
-  auto scale = dynamic_cast<luci::CircleConst *>(mul->y());
-  if (not scale)
-    return false;
-
-  // scale dim(0) == tconv filter channel dim
-  if (filter->rank() != 4)
-    return false;
-  auto filter_out_dim = filter->dim(0).value();
-  if (scale->rank() != 1)
-    return false;
-  auto scale_dim = scale->dim(0).value();
-  if (filter_out_dim != scale_dim)
-    return false;
-
-  // get shift of batchnorm
-  auto shift = dynamic_cast<luci::CircleConst *>(add->y());
-  if (not shift)
-    return false;
-
-  // shift dim(0) == tconv filter channel dim
-  if (shift->rank() != 1)
-    return false;
-  auto shift_dim = shift->dim(0).value();
-  if (filter_out_dim != shift_dim)
-    return false;
-
-  // filter weight = filter weight * mul(scale) + add(shift)
-  uint32_t filter_height_dim = filter->dim(1).value();
-  uint32_t filter_width_dim = filter->dim(2).value();
-  uint32_t filter_in_dim = filter->dim(3).value();
-  for (uint32_t c = 0; c < filter_out_dim; c++)
-  {
-    for (uint32_t h = 0; h < filter_height_dim; h++)
-    {
-      for (uint32_t w = 0; w < filter_width_dim; w++)
-      {
-        for (uint32_t b = 0; b < filter_in_dim; b++)
-        {
-          uint32_t offset = c * filter_height_dim * filter_width_dim * filter_in_dim +
-                            h * filter_width_dim * filter_in_dim + w * filter_in_dim + b;
-          filter->at<loco::DataType::FLOAT32>(offset) *= scale->at<loco::DataType::FLOAT32>(c);
-        }
-      }
-    }
-  }
-
-  // fuse shift with transposed conv
-  tconv->bias(shift);
-
-  if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
-  {
-    // separate relu op from add op
-    auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
-    relu->features(tconv);
-
-    // remove mul node
-    replace(add).with(relu);
-  }
-  else
-  {
-    replace(add).with(tconv);
-  }
-
-  return true;
-}
-
-} // namespace
-
-namespace luci
-{
-
-bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
-{
-  bool changed = false;
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
-  {
-    auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node);
-    if (not tconv)
-      continue;
-
-    changed |= fused_batch_norm_with_tconv(tconv);
-  }
-
-  return changed;
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp

new file mode 100644 (file)

index 0000000..3379549
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
+
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse Mul-Add to TransposeConv if possible.
+ *
+ *  NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ *  BEFORE
+ *                     |   [CircleOutputExclude]
+ *                     |   / [CircleConst]
+ *                     |  / /
+ *     [CircleTransposeConv]  [CircleConst]
+ *                     |     /
+ *                [CircleMul] [CircleConst]
+ *                     |     /
+ *                [CircleAdd]
+ *                     |
+ *
+ *  AFTER
+ *                     |                                          [CircleOutputExclude]
+ *                     +-------------------------------------+   / [CircleConst]
+ *                     |                                     |  / /
+ *                     |                     [CircleTransposeConv]  [CircleConst]
+ *                     |    [CircleConst]                    |     /
+ *                     |   / [CircleConst]              [CircleMul] [CircleConst]
+ *                     |  / /                                |     /
+ *     [CircleTransposeConv]                            [CircleAdd]
+ *                     |
+ *              ([CircleRelu6])
+ *                     |
+ *
+ * Note: CircleRelu6 is inserted if Add activation is ReLU6
+ */
+bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
+{
+  assert(add != nullptr);
+
+  // Find the pattern of CircleTransposeConv - CircleMul - CircleAdd
+  luci::CircleConst *scale = nullptr;
+  luci::CircleConst *shift = nullptr;
+  luci::CircleTransposeConv *tconv = nullptr;
+  luci::CircleMul *mul = nullptr;
+  if (not luci::fill(&shift, &mul).with_commutative_args_of(add))
+    return false;
+  if (not luci::fill(&scale, &tconv).with_commutative_args_of(mul))
+    return false;
+
+  // check scale and shift constant attributes
+  if (scale->rank() != 1)
+    return false;
+  if (shift->rank() != 1)
+    return false;
+  // check mul, add attributes
+  if (mul->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+    return false;
+
+  // tconv bias should be not set
+  if (not dynamic_cast<luci::CircleOutputExclude *>(tconv->bias()))
+    return false;
+
+  // get weight of tconv
+  auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
+  if (not filter)
+    return false;
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (filter->rank() != 4)
+    return false;
+
+  auto filter_out_chn = filter->dim(0).value();
+  if (filter_out_chn != scale->dim(0).value())
+    return false;
+  if (filter_out_chn != shift->dim(0).value())
+    return false;
+
+  auto name = add->name();
+  assert(name.length() > 0);
+
+  loco::Graph *graph = add->graph();
+  luci::CircleTransposeConv *fused_tconv = graph->nodes()->create<luci::CircleTransposeConv>();
+  luci::CircleConst *fused_filter = graph->nodes()->create<luci::CircleConst>();
+  luci::CircleConst *fused_bias = graph->nodes()->create<luci::CircleConst>();
+
+  auto filter_height = filter->dim(1).value();
+  auto filter_width = filter->dim(2).value();
+  auto filter_in_chn = filter->dim(3).value();
+
+  // Copy filter shape
+  fused_filter->dtype(filter->dtype());
+  fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+  fused_filter->rank(4);
+  fused_filter->dim(0).set(filter_out_chn);
+  fused_filter->dim(1).set(filter_height);
+  fused_filter->dim(2).set(filter_width);
+  fused_filter->dim(3).set(filter_in_chn);
+  fused_filter->shape_status(luci::ShapeStatus::VALID);
+  fused_filter->name(name + "/TransposeConv/filter");
+
+  // fused filter weight = filter weight * mul(scale) + add(shift)
+  for (uint32_t c = 0; c < filter_out_chn; c++)
+  {
+    for (uint32_t h = 0; h < filter_height; h++)
+    {
+      for (uint32_t w = 0; w < filter_width; w++)
+      {
+        for (uint32_t b = 0; b < filter_in_chn; b++)
+        {
+          uint32_t offset = c * filter_height * filter_width * filter_in_chn +
+                            h * filter_width * filter_in_chn + w * filter_in_chn + b;
+          fused_filter->at<loco::DataType::FLOAT32>(offset) =
+            filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(c);
+        }
+      }
+    }
+  }
+
+  // Copy fused_bias from shift
+  fused_bias->dtype(shift->dtype());
+  fused_bias->size<loco::DataType::FLOAT32>(shift->size<loco::DataType::FLOAT32>());
+  fused_bias->rank(1);
+  fused_bias->dim(0).set(filter_out_chn);
+  fused_bias->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t c = 0; c < filter_out_chn; ++c)
+  {
+    fused_bias->at<loco::DataType::FLOAT32>(c) = shift->at<loco::DataType::FLOAT32>(c);
+  }
+  fused_bias->name(name + "/TransposeConv/bias");
+
+  // set new tconv properties
+  fused_tconv->inputSizes(tconv->inputSizes());
+  fused_tconv->filter(fused_filter);
+  fused_tconv->outBackprop(tconv->outBackprop());
+  fused_tconv->bias(fused_bias);
+  fused_tconv->padding(tconv->padding());
+  fused_tconv->stride()->h(tconv->stride()->h());
+  fused_tconv->stride()->w(tconv->stride()->w());
+  fused_tconv->name(name + "/TransposeConv");
+  luci::add_origin(fused_tconv,
+                   luci::composite_origin(
+                     {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(tconv)}));
+
+  if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+  {
+    // separate relu op from add op
+    auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
+    relu->features(fused_tconv);
+    relu->name(name + "/Relu6");
+    luci::add_origin(relu, luci::get_origin(add));
+
+    replace(add).with(relu);
+  }
+  else
+  {
+    replace(add).with(fused_tconv);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+    {
+      if (fused_batch_norm_with_tconv(add))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.test.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.test.cpp

new file mode 100644 (file)

index 0000000..051100d
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithTConvPassTest, name)
+{
+  luci::FuseBatchNormWithTConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.cpp

index 237152f987d1e68cc25f19c7495568228bb4fd62..ab7baa1fa858d8011c122b1c51279bc97ff19183 100644 (file)
--- a/compiler/luci/pass/src/FuseInstanceNormPass.cpp
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
@@ -15,105 +15,16 @@
   */
  
  #include "luci/Pass/FuseInstanceNormPass.h"
+#include "helpers/NodeFiller.h"
  #include "FuseInstanceNormPassInternal.h"
  
  #include <luci/IR/CircleNodes.h>
  
-#include <loco/Service/ShapeInference.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  #include <cassert>
  #include <set>
  
-// Helper to find commutative node's arguments
-namespace
-{
-
-/**
- * INTRODUCTION
- *         Binary operation f(x,y) is 'commutative' when
- *         f(x,y) == f(y,x) holds for all x, y.
- *         For examples, ADD, MUL and SQUARED_DIFFERENCE are commutative.
- *         These helpers make it easy to find commutative arguemnts of commtative node.
- *
- * HOW TO USE
- *         COMM_NODE *node;
- *         ARG_TYPE_1 *arg1;
- *         ARG_TYPE_2 *arg2;
- *
- *         bool ok = fill(&arg1, &arg2).with_commutative_args_of(node);
- *
- * Result
- *         If 'node's commutative argument types are actually {ARG_TYPE_1, ARG_TYPE_2}
- *         (as a set), 'arg1' and 'arg2' set as actual 'node's arguemnts with matching
- *         type, and return value 'ok' is true.
- *         Otherwise, 'arg1' and 'arg2' not changed, 'ok' is false.
- */
-
-template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final
-{
-public:
-  NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)
-  {
-    // DO NOTHING
-  }
-
-  /**
-   * @return true   When 'node's argument types are 'ARG_TYPE_1' and 'ARG_TYPE_2'
-   *                In such case, it assign '_arg_1' and '_arg_2' to actual arguments
-   *
-   * @return false  When 'node's argument types are NOT matched with 'ARG_TYPE_*'
-   *                In such case, it does not amend '_arg_1' and '_arg_2'
-   *
-   * @require       COMM_NODE has member x() and y()
-   */
-  template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
-
-private:
-  ARG_TYPE_1 **_arg_1;
-  ARG_TYPE_2 **_arg_2;
-};
-
-template <class ARG_TYPE_1, class ARG_TYPE_2>
-inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
-{
-  return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};
-}
-
-template <class ARG_TYPE_1, class ARG_TYPE_2>
-template <class COMM_NODE>
-bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)
-{
-  // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2
-  {
-    auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
-    auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
-
-    if (x && y)
-    {
-      *_arg_1 = x;
-      *_arg_2 = y;
-      return true;
-    }
-  }
-
-  // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1
-  {
-    auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());
-    auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());
-
-    if (x && y)
-    {
-      *_arg_1 = y;
-      *_arg_2 = x;
-      return true;
-    }
-  }
-
-  return false;
-}
-
-} // namespace
-
  // Helper to check detail
  
  /// @return true  When node has shape of '1 x .. x 1 x depth'
@@ -150,11 +61,10 @@ bool is_instance_mean_v0(luci::CircleMean *mean)
    //
    // CHECK 1) input is rank 4
    //
-  auto input = mean->input();
-  if (not loco::shape_known(input))
+  auto input = loco::must_cast<luci::CircleNode *>(mean->input());
+  if (input->shape_status() != luci::ShapeStatus::VALID)
      return false;
-  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
-  if (input_shape.rank() != 4)
+  if (input->rank() != 4)
      return false;
  
    //
@@ -195,11 +105,10 @@ bool is_instance_mean_v1(luci::CircleMean *mean)
    //
    // CHECK 1) input is rank 5 (NHWCX)
    //
-  auto input = mean->input();
-  if (not loco::shape_known(input))
+  auto input = loco::must_cast<luci::CircleNode *>(mean->input());
+  if (input->shape_status() != luci::ShapeStatus::VALID)
      return false;
-  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
-  if (input_shape.rank() != 5)
+  if (input->rank() != 5)
      return false;
  
    //
@@ -445,8 +354,9 @@ bool InstanceNormPattern::matched()
    // So it is handled in the separate if statement
    if (_pv == PatternVersion::Version_2)
    {
-    CHECK_OR_FALSE(fill(&mul_gamma, &const_as_beta).with_commutative_args_of(add_as_terminal));
-    CHECK_OR_FALSE(fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma));
+    CHECK_OR_FALSE(
+      luci::fill(&mul_gamma, &const_as_beta).with_commutative_args_of(add_as_terminal));
+    CHECK_OR_FALSE(luci::fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma));
  
      sub = dynamic_cast<luci::CircleSub *>(div->x());
      CHECK_OR_FALSE(sub);
@@ -456,6 +366,7 @@ bool InstanceNormPattern::matched()
  
      luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
      CHECK_OR_FALSE(ifm_node->rank() == 4);
+    CHECK_OR_FALSE(ifm_node->dim(3).known());
      uint32_t ifm_channel_depth = ifm_node->dim(3).value();
  
      mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
@@ -477,7 +388,7 @@ bool InstanceNormPattern::matched()
      CHECK_OR_FALSE(zero_point_five->at<loco::DataType::FLOAT32>(0) == 0.5);
  
      CHECK_OR_FALSE(
-        fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+      luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
      CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
      // TODO Support regarding broadcast
      CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
@@ -489,7 +400,8 @@ bool InstanceNormPattern::matched()
  
      loco::Node *ifm_should_be = nullptr;
      luci::CircleMean *mean_of_ifm_should_be = nullptr;
-    CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm_should_be).with_commutative_args_of(sqdiff));
+    CHECK_OR_FALSE(
+      luci::fill(&ifm_should_be, &mean_of_ifm_should_be).with_commutative_args_of(sqdiff));
      CHECK_OR_FALSE(ifm == ifm_should_be);
      CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
  
@@ -503,25 +415,25 @@ bool InstanceNormPattern::matched()
  
    if (_pv == PatternVersion::Version_0)
    {
-    CHECK_OR_FALSE(fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
-    CHECK_OR_FALSE(fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+    CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+    CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
    }
    if (_pv == PatternVersion::Version_1)
    {
-    CHECK_OR_FALSE(fill(&mul_as_scaled_reshape, &sub).with_commutative_args_of(add_as_terminal));
      CHECK_OR_FALSE(
-        fill(&reshape_of_ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_reshape));
+      luci::fill(&mul_as_scaled_reshape, &sub).with_commutative_args_of(add_as_terminal));
+    CHECK_OR_FALSE(
+      luci::fill(&reshape_of_ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_reshape));
      ifm = reshape_of_ifm->tensor();
    }
  
-  CHECK_OR_FALSE(loco::shape_known(ifm));
-  auto ifm_shape = loco::shape_get(ifm);
-  CHECK_OR_FALSE(ifm_shape.domain() == loco::Domain::Tensor);
-  auto ifm_tensor_shape = ifm_shape.as<loco::TensorShape>();
-  CHECK_OR_FALSE(ifm_tensor_shape.rank() == 4);
-  uint32_t ifm_channel_depth = ifm_tensor_shape.dim(3).value();
+  auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm);
+  CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID);
+  CHECK_OR_FALSE(ifm_circle->rank() == 4);
+  CHECK_OR_FALSE(ifm_circle->dim(3).known());
+  uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
  
-  CHECK_OR_FALSE(fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
+  CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
  
    if (_pv == PatternVersion::Version_0)
    {
@@ -536,7 +448,7 @@ bool InstanceNormPattern::matched()
    CHECK_OR_FALSE(add_as_variance);
  
    CHECK_OR_FALSE(
-      fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+    luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
  
    CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
    // TODO Support regarding broadcast
@@ -557,7 +469,7 @@ bool InstanceNormPattern::matched()
    if (_pv == PatternVersion::Version_0)
    {
      loco::Node *ifm_should_be = nullptr;
-    CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
+    CHECK_OR_FALSE(luci::fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
      CHECK_OR_FALSE(ifm == ifm_should_be);
      CHECK_OR_FALSE(is_instance_mean_v0(mean_of_ifm));
      CHECK_OR_FALSE(ifm == mean_of_ifm->input());
@@ -565,7 +477,8 @@ bool InstanceNormPattern::matched()
    if (_pv == PatternVersion::Version_1)
    {
      loco::Node *reshape_should_be = nullptr;
-    CHECK_OR_FALSE(fill(&reshape_should_be, &mean_of_reshape).with_commutative_args_of(sqdiff));
+    CHECK_OR_FALSE(
+      luci::fill(&reshape_should_be, &mean_of_reshape).with_commutative_args_of(sqdiff));
      CHECK_OR_FALSE(reshape_of_ifm == reshape_should_be);
      CHECK_OR_FALSE(is_instance_mean_v1(mean_of_reshape));
      CHECK_OR_FALSE(reshape_of_ifm == mean_of_reshape->input());
@@ -592,15 +505,15 @@ bool InstanceNormPattern::matched()
  
    if (_pv == PatternVersion::Version_0)
    {
-    CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
-                       .with_commutative_args_of(mul_as_scaled_mean));
+    CHECK_OR_FALSE(luci::fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
+                     .with_commutative_args_of(mul_as_scaled_mean));
      CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
      CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
    }
    if (_pv == PatternVersion::Version_1)
    {
-    CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_reshape_should_be)
-                       .with_commutative_args_of(mul_as_scaled_mean));
+    CHECK_OR_FALSE(luci::fill(&mul_gamma_should_be, &mean_of_reshape_should_be)
+                     .with_commutative_args_of(mul_as_scaled_mean));
      CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
      CHECK_OR_FALSE(mean_of_reshape == mean_of_reshape_should_be);
    }
@@ -631,47 +544,59 @@ void fuse_instance_norm(const InstanceNormPattern &p)
  
    auto graph = p.add_as_terminal->graph();
  
-  // Special case for version 2 (no need to reshape)
-  if (p.version() == InstanceNormPattern::Version_2)
+  // Version 0 and 1 need to reshape
+  if (p.version() != InstanceNormPattern::Version_2)
    {
-    // Make Instance Norm to replace
-    auto instance_norm = graph->nodes()->create<luci::CircleInstanceNorm>();
-    instance_norm->input(p.ifm);
-    instance_norm->gamma(p.const_as_gamma);
-    instance_norm->beta(p.const_as_beta);
-    float epsilon = p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);
-    instance_norm->epsilon(epsilon);
-    instance_norm->fusedActivationFunction(p.add_as_terminal->fusedActivationFunction());
-
-    replace(p.add_as_terminal).with(instance_norm);
-
-    return;
-  }
-
-  // Make reshape for gamma & beta
-  auto reshape_gamma = graph->nodes()->create<luci::CircleReshape>();
-  auto reshape_beta = graph->nodes()->create<luci::CircleReshape>();
-  {
-    auto ifm_shape = loco::shape_get(p.ifm).as<loco::TensorShape>();
-    uint32_t ifm_channel_depth = ifm_shape.dim(3).value();
-
-    int32_t new_shape[1] = {static_cast<int32_t>(ifm_channel_depth)};
-
-    reshape_gamma->tensor(p.const_as_gamma);
-    reshape_beta->tensor(p.const_as_beta);
+    p.const_as_gamma->rank(1);
+    p.const_as_gamma->dim(0).set(p.const_as_gamma->size<loco::DataType::FLOAT32>());
+    p.const_as_beta->rank(1);
+    p.const_as_beta->dim(0).set(p.const_as_beta->size<loco::DataType::FLOAT32>());
  
-    luci::set_new_shape(reshape_gamma, new_shape, 1);
-    luci::set_new_shape(reshape_beta, new_shape, 1);
+    p.const_as_gamma->shape_status(luci::ShapeStatus::UNDEFINED);
+    p.const_as_beta->shape_status(luci::ShapeStatus::UNDEFINED);
    }
  
    // Make Instance Norm to replace
    auto instance_norm = graph->nodes()->create<luci::CircleInstanceNorm>();
    instance_norm->input(p.ifm);
-  instance_norm->gamma(reshape_gamma);
-  instance_norm->beta(reshape_beta);
+  instance_norm->gamma(p.const_as_gamma);
+  instance_norm->beta(p.const_as_beta);
    float epsilon = p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);
    instance_norm->epsilon(epsilon);
    instance_norm->fusedActivationFunction(p.add_as_terminal->fusedActivationFunction());
+  // NOTE unique name should be assigned in export
+  instance_norm->name("InstanceNorm");
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(p.sqdiff),
+    luci::get_origin(p.mean_as_variance),
+    luci::get_origin(p.add_as_variance),
+    luci::get_origin(p.mul_gamma),
+    luci::get_origin(p.sub),
+    luci::get_origin(p.add_as_terminal)};
+  if (p.version() == InstanceNormPattern::PatternVersion::Version_0)
+  {
+    origin_vec.push_back(luci::get_origin(p.mean_of_ifm));
+    origin_vec.push_back(luci::get_origin(p.rsqrt));
+    origin_vec.push_back(luci::get_origin(p.mul_as_scaled_ifm));
+    origin_vec.push_back(luci::get_origin(p.mul_as_scaled_mean));
+  }
+  if (p.version() == InstanceNormPattern::PatternVersion::Version_1)
+  {
+    origin_vec.push_back(luci::get_origin(p.reshape_of_ifm));
+    origin_vec.push_back(luci::get_origin(p.mean_of_reshape));
+    origin_vec.push_back(luci::get_origin(p.rsqrt));
+    origin_vec.push_back(luci::get_origin(p.mul_as_scaled_mean));
+    origin_vec.push_back(luci::get_origin(p.mul_as_scaled_reshape));
+  }
+  if (p.version() == InstanceNormPattern::PatternVersion::Version_2)
+  {
+    origin_vec.push_back(luci::get_origin(p.mean_of_ifm));
+    origin_vec.push_back(luci::get_origin(p.pow));
+    origin_vec.push_back(luci::get_origin(p.div));
+  }
+  luci::add_origin(instance_norm, luci::composite_origin(origin_vec));
  
    replace(p.add_as_terminal).with(instance_norm);
  }
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp

index 3037f3def3f000c6282cbb12d560e925376382b0..b83ccca5047d5801489414d70ee89279953a96a1 100644 (file)
--- a/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp
@@ -16,6 +16,8 @@
  
  #include "FuseInstanceNormPassInternal.h"
  
+#include "luci/Pass/FuseInstanceNormPass.h"
+
  #include <vector>
  
  #include <gtest/gtest.h>
@@ -34,6 +36,13 @@ void setShape(luci::CircleNode &node, const std::vector<int> &v)
  
  } // namespace
  
+TEST(FuseInstanceNormPassTest, name)
+{
+  luci::FuseInstanceNormPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
  TEST(FuseInstanceNormPass, is_quasi_1D_with_dummy_dim)
  {
    luci::CircleConst const_node;
diff --git a/compiler/luci/pass/src/FusePreActivationBatchNormPass.cpp b/compiler/luci/pass/src/FusePreActivationBatchNormPass.cpp

index bcde5fac4958571f8ac7091bfe644789732c94b0..469fcddbbd090776bca199e9a8e6393aec19f7f8 100644 (file)
--- a/compiler/luci/pass/src/FusePreActivationBatchNormPass.cpp
+++ b/compiler/luci/pass/src/FusePreActivationBatchNormPass.cpp
@@ -16,9 +16,11 @@
  
  #include "luci/Pass/FusePreActivationBatchNormPass.h"
  #include "FusePreActivationBatchNormPassInternal.h"
+#include "BatchNormPatternFinder.h"
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/Log.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  namespace
  {
@@ -37,83 +39,6 @@ bool is_non_negative(const luci::CircleConst *node)
    return true;
  }
  
-// Check if mul is batchnorm mul
-bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
-                      luci::CircleConst *&gamma)
-{
-  auto x = dynamic_cast<luci::CircleConst *>(mul->x());
-  auto y = dynamic_cast<luci::CircleConst *>(mul->y());
-
-  luci::CircleNode *pred = nullptr;
-  luci::CircleConst *constant = nullptr;
-
-  if (x != nullptr && y == nullptr)
-  {
-    pred = loco::must_cast<luci::CircleNode *>(mul->y());
-    constant = x;
-  }
-  else if (x == nullptr && y != nullptr)
-  {
-    pred = loco::must_cast<luci::CircleNode *>(mul->x());
-    constant = y;
-  }
-  else
-  {
-    return false;
-  }
-
-  if (constant->rank() != 1)
-    return false;
-
-  auto channel_dim = constant->dim(0);
-  if (!(channel_dim == mul->dim(mul->rank() - 1)))
-    return false;
-
-  pred_node = pred;
-  gamma = constant;
-  return true;
-}
-
-// Check if add is batchnorm add
-bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta)
-{
-  auto x = loco::must_cast<luci::CircleNode *>(add->x());
-  auto y = loco::must_cast<luci::CircleNode *>(add->y());
-
-  luci::CircleMul *pred = nullptr;
-  luci::CircleConst *constant = nullptr;
-
-  if (add->fusedActivationFunction() != luci::FusedActFunc::RELU)
-    return false;
-
-  if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL)
-  {
-    pred = loco::must_cast<luci::CircleMul *>(y);
-    constant = loco::must_cast<luci::CircleConst *>(x);
-  }
-  else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST)
-  {
-    pred = loco::must_cast<luci::CircleMul *>(x);
-    constant = loco::must_cast<luci::CircleConst *>(y);
-  }
-  else
-  {
-    return false;
-  }
-
-  if (constant->rank() != 1)
-    return false;
-
-  auto channel_dim = constant->dim(0);
-  // Assumption: Layout is channel-last
-  if (!(channel_dim == add->dim(add->rank() - 1)))
-    return false;
-
-  mul = pred;
-  beta = constant;
-  return true;
-}
-
  const luci::CircleConv2D *get_forward_conv2d(const luci::CircleNode *node, uint32_t channel_size)
  {
    auto opcode = node->opcode();
@@ -249,6 +174,9 @@ bool update_conv_bias_with_beta(luci::CircleConv2D *conv, const luci::CircleCons
    auto size = beta->dim(0).value();
    auto bias = dynamic_cast<luci::CircleConst *>(conv->bias());
  
+  auto name = conv->name();
+  assert(name.length() > 0);
+
    if (bias == nullptr)
    {
      bias = conv->graph()->nodes()->create<luci::CircleConst>();
@@ -256,6 +184,7 @@ bool update_conv_bias_with_beta(luci::CircleConv2D *conv, const luci::CircleCons
      bias->rank(1);
      bias->dim(0).set(size);
      bias->size<loco::DataType::FLOAT32>(size);
+    bias->name(name + "/bias");
      conv->bias(bias);
    }
    else
@@ -282,14 +211,12 @@ bool update_conv_bias_with_beta(luci::CircleConv2D *conv, const luci::CircleCons
  
  luci::CircleSub *insert_sub(luci::CircleNode *pred, luci::CircleConst *beta)
  {
+  auto name = pred->name();
+  assert(name.length() > 0);
+
    auto sub = pred->graph()->nodes()->create<luci::CircleSub>();
-  sub->dtype(loco::DataType::FLOAT32);
-  sub->rank(pred->rank());
-  for (uint32_t i = 0; i < sub->rank(); i++)
-  {
-    sub->dim(i).set(pred->dim(i).value());
-  }
    sub->fusedActivationFunction(luci::FusedActFunc::NONE);
+  sub->name(name + "/Sub");
  
    loco::replace(pred).with(sub);
  
@@ -366,6 +293,8 @@ bool fuse_sub_with_conv(luci::CircleSub *sub)
    if (!update_conv_bias_with_beta(conv, beta, false))
      return false;
  
+  luci::add_origin(conv, luci::get_origin(sub));
+
    auto pred = sub->x();
    loco::replace(sub).with(pred);
  
@@ -442,6 +371,7 @@ bool fuse_add_with_conv(luci::CircleAdd *add, std::vector<luci::CircleSub *> &su
      if (!update_conv_bias_with_beta(conv, beta, true))
        return false;
  
+    luci::add_origin(conv, luci::get_origin(add));
      loco::replace(add).with(pred);
      add->drop();
  
@@ -462,6 +392,8 @@ bool fuse_add_with_conv(luci::CircleAdd *add, std::vector<luci::CircleSub *> &su
      if (!update_conv_bias_with_beta(conv, beta, true))
        return false;
  
+    luci::add_origin(conv, luci::get_origin(add));
+
      auto relu = *loco::succs(add).begin();
      auto relu_node = loco::must_cast<luci::CircleRelu *>(relu);
      assert(relu_node != nullptr);
@@ -471,6 +403,7 @@ bool fuse_add_with_conv(luci::CircleAdd *add, std::vector<luci::CircleSub *> &su
      add->drop();
  
      sub_list.push_back(insert_sub(pred, beta));
+    luci::add_origin(sub_list.back(), luci::get_origin(add));
  
      relu_node->features(pred);
  
@@ -530,6 +463,11 @@ bool fuse_mul_with_conv(luci::CircleMul *mul)
  
        // Update CONV weights
        update_conv_weights_with_gamma(conv, gamma);
+
+      // Update origin
+      // TODO need to remove const
+      luci::add_origin(const_cast<luci::CircleConv2D *>(conv),
+                       luci::get_origin(loco::must_cast<luci::CircleNode *>(mul)));
      }
  
      loco::replace(mul).with(pred_node);
@@ -568,6 +506,8 @@ bool swap_mul_add(luci::CircleAdd *add, std::vector<luci::CircleMul *> &mul_list
  
    if (!is_batchnorm_add(add, mul, beta))
      return false;
+  if (add->fusedActivationFunction() != luci::FusedActFunc::RELU)
+    return false;
  
    if (loco::succs(mul).size() != 1)
      return false;
@@ -582,8 +522,13 @@ bool swap_mul_add(luci::CircleAdd *add, std::vector<luci::CircleMul *> &mul_list
      return false;
  
    // Insert Relu at the bottom
+  auto name = add->name();
+  assert(name.length() > 0);
+
    auto relu = add->graph()->nodes()->create<luci::CircleRelu>();
    relu->features(mul);
+  relu->name(name + "/Relu");
+  luci::add_origin(relu, luci::get_origin(add));
    loco::replace(add).with(relu);
  
    // Replace beta <- beta / gamma
diff --git a/compiler/luci/pass/src/FusePreActivationBatchNormPass.test.cpp b/compiler/luci/pass/src/FusePreActivationBatchNormPass.test.cpp

index a79b5bd5db1e7568b2117a78a0145d35aed214d3..3d5791c9ecfc0c324bf50c42bdc5c79e1b9d57aa 100644 (file)
--- a/compiler/luci/pass/src/FusePreActivationBatchNormPass.test.cpp
+++ b/compiler/luci/pass/src/FusePreActivationBatchNormPass.test.cpp
@@ -16,6 +16,8 @@
  
  #include "FusePreActivationBatchNormPassInternal.h"
  
+#include "luci/Pass/FusePreActivationBatchNormPass.h"
+
  #include <luci/IR/CircleNodes.h>
  
  #include <math.h>
@@ -148,6 +150,22 @@ public:
          conv_filter->at<loco::DataType::FLOAT32>(i * out_size + j) = i * out_size + j;
        }
      }
+
+    pred_conv->name("pred_conv");
+    pred_conv_filter->name("pred_conv_filter");
+    pred_conv_bias->name("pred_conv_bias");
+    pred_conv2->name("pred_conv2");
+    pred_conv2_filter->name("pred_conv2_filter");
+    pred_conv2_bias->name("pred_conv2_bias");
+    pred_add->name("pred_add");
+    mul->name("mul");
+    mul_gamma->name("mul_gamma");
+    add->name("add");
+    add_beta->name("add_beta");
+    conv->name("conv");
+    conv_filter->name("conv_filter");
+    conv_bias->name("conv_bias");
+    succ_add->name("succ_add");
    }
  
  public:
@@ -171,6 +189,13 @@ public:
  
  } // namespace
  
+TEST(FusePreActivationBatchNormPassTest, name)
+{
+  luci::FusePreActivationBatchNormPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
  TEST(FusePreActivationBatchNorm, swap_mul_add)
  {
    SimpleGraph g;
diff --git a/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.cpp b/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.cpp

index 281d1b0810340b11388d06a63f79dbae26bcff9b..96776dc92a05ba70b0260cf95b8f54a72b1017b8 100644 (file)
--- a/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.cpp
+++ b/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.cpp
@@ -16,6 +16,8 @@
  
  #include "luci/Pass/MakeBatchNormGammaPositivePass.h"
  
+#include "BatchNormPatternFinder.h"
+
  #include <luci/IR/CircleNodes.h>
  
  namespace
@@ -39,71 +41,27 @@ bool negative_gamma_to_positive(luci::CircleConst *gamma)
    return changed;
  }
  
-// Check if add is batchnorm add
-bool is_batchnorm_add(const luci::CircleAdd *add)
+bool make_positive_gamma(luci::CircleAdd *add)
  {
-  auto x = dynamic_cast<luci::CircleConst *>(add->x());
-  auto y = dynamic_cast<luci::CircleConst *>(add->y());
-
-  luci::CircleConst *constant = nullptr;
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+  luci::CircleConst *gamma = nullptr;
+  luci::CircleNode *pred = nullptr;
  
-  if (x != nullptr && y == nullptr)
-    constant = x;
-  else if (x == nullptr && y != nullptr)
-    constant = y;
-  else
+  if (!is_batchnorm_add(add, mul, beta))
      return false;
  
-  if (constant->rank() != 1)
+  if (loco::succs(mul).size() != 1)
      return false;
  
+  if (!is_batchnorm_mul(mul, pred, gamma))
+    return false;
+  assert(pred == add);
    // Only support Relu
    if (add->fusedActivationFunction() != luci::FusedActFunc::RELU)
      return false;
  
-  auto channel_dim = constant->dim(0);
-  if (!(channel_dim == add->dim(add->rank() - 1)))
-    return false;
-
-  return true;
-}
-
-// Check if mul is batchnorm mul
-bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleConst *&gamma)
-{
-  auto x = dynamic_cast<luci::CircleConst *>(mul->x());
-  auto y = dynamic_cast<luci::CircleConst *>(mul->y());
-
-  luci::CircleConst *constant = nullptr;
-
-  if (x != nullptr && y == nullptr)
-    constant = x;
-  else if (x == nullptr && y != nullptr)
-    constant = y;
-  else
-    return false;
-
-  if (constant->rank() != 1)
-    return false;
-
-  auto channel_dim = constant->dim(0);
-  if (!(channel_dim == mul->dim(mul->rank() - 1)))
-    return false;
-
-  // Check successor is batchnorm add
-  auto succs = loco::succs(mul);
-  if (succs.size() != 1)
-    return false;
-
-  auto add = dynamic_cast<luci::CircleAdd *>(*succs.begin());
-  if (add == nullptr)
-    return false;
-
-  if (!is_batchnorm_add(add))
-    return false;
-
-  gamma = constant;
-  return true;
+  return negative_gamma_to_positive(gamma);
  }
  
  } // namespace
@@ -111,18 +69,29 @@ bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleConst *&gamma)
  namespace luci
  {
  
+/**
+ * Make negative gamma values of Mul-Add (as BatchNorm) to a small positive value (1e-10)
+ *
+ *  PATTERN:
+ *          |
+ *    [CircleNode] [CircleConst](as gamma)
+ *              |   |
+ *           [CircleMul] [CircleConst]
+ *                   |    |
+ *               [CircleAdd]
+ *                     |
+ */
  bool MakeBatchNormGammaPositivePass::run(loco::Graph *g)
  {
    bool changed = false;
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    auto mul = dynamic_cast<luci::CircleMul *>(node);
-    if (mul == nullptr)
+    auto add = dynamic_cast<luci::CircleAdd *>(node);
+    if (add == nullptr)
        continue;
  
-    luci::CircleConst *gamma;
-    if (is_batchnorm_mul(mul, gamma))
-      changed = negative_gamma_to_positive(gamma);
+    if (make_positive_gamma(add))
+      changed = true;
    }
    return changed;
  }
diff --git a/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.test.cpp b/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.test.cpp

new file mode 100644 (file)

index 0000000..83093ed
--- /dev/null
+++ b/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(MakeBatchNormGammaPositivePassTest, name)
+{
+  luci::MakeBatchNormGammaPositivePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp b/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp

deleted file mode 100644 (file)

index beb962a..0000000
--- a/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/MigrateLegacyShapeDtypePass.h"
-
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/TypeInference.h>
-
-#include <luci/IR/CircleNodes.h>
-
-#include <loco.h>
-
-namespace
-{
-
-bool has_same_shape(luci::CircleNode *node, loco::TensorShape shape)
-{
-  if (node->rank() != shape.rank())
-    return false;
-
-  for (uint32_t i = 0; i < shape.rank(); ++i)
-    if (!(node->dim(i) == shape.dim(i)))
-      return false;
-
-  return true;
-}
-
-} // namespace
-
-namespace luci
-{
-
-bool MigrateLegacyShapeDtypePass::run(luci::Module *m)
-{
-  bool changed = false;
-
-  for (size_t g = 0; g < m->size(); ++g)
-  {
-    if (run(m->graph(g)))
-      changed = true;
-  }
-
-  return changed;
-}
-
-bool MigrateLegacyShapeDtypePass::run(loco::Graph *g)
-{
-  bool changed = false;
-
-  for (auto node : loco::all_nodes(g))
-  {
-    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    if (loco::shape_known(node))
-    {
-      auto loco_shape = loco::shape_get(node).as<loco::TensorShape>();
-
-      assert(circle_node->shape_signature().rank() == 0 ||
-             circle_node->shape_signature().rank() == loco_shape.rank());
-
-      // When shape of loco is copied to circle node, ShapeSignature should be applied.
-      loco::TensorShape new_shape;
-      new_shape.rank(loco_shape.rank());
-      for (uint32_t i = 0; i < loco_shape.rank(); ++i)
-      {
-        if (circle_node->shape_signature().rank() > 0 &&
-            circle_node->shape_signature().dim(i) == -1)
-          new_shape.dim(i) = 1;
-        else
-          new_shape.dim(i) = loco_shape.dim(i);
-      }
-
-      if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED ||
-          !has_same_shape(circle_node, new_shape))
-      {
-        circle_node->rank(new_shape.rank());
-        for (uint32_t i = 0; i < new_shape.rank(); ++i)
-          circle_node->dim(i) = new_shape.dim(i);
-
-        if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED)
-          circle_node->shape_status(luci::ShapeStatus::VALID);
-
-        changed = true;
-      }
-    }
-
-    if (loco::dtype_known(node))
-    {
-      if (loco::dtype_get(node) != circle_node->dtype())
-      {
-        circle_node->dtype(loco::dtype_get(node));
-        changed = true;
-      }
-    }
-  }
-
-  return changed;
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/ModulePhase.test.cpp b/compiler/luci/pass/src/ModulePhase.test.cpp

new file mode 100644 (file)

index 0000000..5d92c59
--- /dev/null
+++ b/compiler/luci/pass/src/ModulePhase.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModulePhase.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(ModulePhaseTest, saturate)
+{
+  auto m = luci::make_module();
+  auto g = loco::make_graph();
+  m->add(std::move(g));
+
+  luci::Phase phase;
+
+  // Any Pass will do for testing
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+  luci::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{m.get()};
+  phase_runner.run(phase);
+
+  SUCCEED();
+}
+
+TEST(ModulePhaseTest, restart)
+{
+  auto m = luci::make_module();
+  auto g = loco::make_graph();
+  m->add(std::move(g));
+
+  luci::Phase phase;
+
+  // Any Pass will do for testing
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+  luci::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{m.get()};
+  phase_runner.run(phase);
+
+  SUCCEED();
+}
diff --git a/compiler/luci/pass/src/PassTestGraphs.h b/compiler/luci/pass/src/PassTestGraphs.h

new file mode 100644 (file)

index 0000000..f5ae24f
--- /dev/null
+++ b/compiler/luci/pass/src/PassTestGraphs.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_GRAPHS_H__
+#define __LUCI_PASS_TEST_GRAPHS_H__
+
+#include <loco.h>
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+/**
+ *  ConstantFoldingTestGraph is a base class for testing
+ *  constant folding passes. It creates Input and Output
+ *  in the below graph. Child classes must implement Connector
+ *  and Folded pattern.
+ *
+ *      [Input]   [Folded pattern] (Implemented by child class)
+ *           \    /
+ *         [Connector] (Implemented by child class)
+ *              |
+ *           [Output]
+ *
+ *    Connector should satisfy the below conditions
+ *      - Input type == Output type == Folded pattern type
+ *      - Input shape == Output shape == Folded pattern shape
+ *
+ *    For example, Add, Mul, Sub, .. can be a Connector
+ */
+class ConstantFoldingTestGraph
+{
+public:
+  ConstantFoldingTestGraph(std::vector<uint32_t> input_shape, loco::DataType input_dtype)
+  {
+    _input = _g.nodes()->create<luci::CircleInput>();
+    _output = _g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = _g.inputs()->create();
+    _input->index(graph_input->index());
+    auto graph_output = _g.outputs()->create();
+    _output->index(graph_output->index());
+
+    graph_input->dtype(input_dtype);
+    graph_output->dtype(input_dtype);
+    _input->dtype(input_dtype);
+    _output->dtype(input_dtype);
+
+    auto input_tensor_shape = std::make_unique<loco::TensorShape>();
+    input_tensor_shape->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      input_tensor_shape->dim(i).set(input_shape[i]);
+    graph_input->shape(std::move(input_tensor_shape));
+
+    auto output_tensor_shape = std::make_unique<loco::TensorShape>();
+    output_tensor_shape->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      output_tensor_shape->dim(i).set(input_shape[i]);
+    graph_output->shape(std::move(output_tensor_shape));
+
+    _input->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      _input->dim(i).set(input_shape[i]);
+
+    _output->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      _output->dim(i).set(input_shape[i]);
+
+    _input->name("input");
+    _output->name("output");
+  }
+
+  virtual void init() = 0;
+
+  virtual ~ConstantFoldingTestGraph() = default;
+
+  virtual loco::Node *createFoldedPattern() = 0;
+
+  virtual luci::CircleConst *getFoldedPattern() = 0;
+
+  loco::Graph *graph() { return &_g; }
+
+  // NOTE: we're not adding _ prefix as these class members are public
+protected:
+  loco::Graph _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+/**
+ *  ConstantFoldingTestAddGraph is ConstantFoldingTestGraph
+ *  whose Connector is Add.
+ */
+class ConstantFoldingAddTestGraph : public ConstantFoldingTestGraph
+{
+protected:
+  ConstantFoldingAddTestGraph(std::vector<uint32_t> input_shape, loco::DataType input_dtype)
+    : ConstantFoldingTestGraph(input_shape, input_dtype)
+  {
+    _add = _g.nodes()->create<luci::CircleAdd>();
+    _add->dtype(input_dtype);
+
+    _add->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      _add->dim(i).set(input_shape[i]);
+
+    _add->x(_input);
+
+    _output->from(_add);
+
+    _add->name("add");
+  }
+
+protected:
+  void init() override { _add->y(createFoldedPattern()); }
+
+protected:
+  luci::CircleConst *getFoldedPattern() override
+  {
+    return dynamic_cast<luci::CircleConst *>(_add->y());
+  }
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_GRAPHS_H__
diff --git a/compiler/luci/pass/src/ProgressReporter.h b/compiler/luci/pass/src/ProgressReporter.h

index cf30da7351d79534d4c3794f17cf5467f044ecd3..8c6c95e659c0e66eda79cb48813adc1c3621479d 100644 (file)
--- a/compiler/luci/pass/src/ProgressReporter.h
+++ b/compiler/luci/pass/src/ProgressReporter.h
@@ -30,7 +30,7 @@ class ProgressReporter : public logo::PhaseEventListener
  {
  public:
    ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
-      : _graph{graph}, _strategy{strategy}
+    : _graph{graph}, _strategy{strategy}
    {
      // DO NOTHING
    }
@@ -54,7 +54,7 @@ class ModuleProgressReporter : public logo::PhaseEventListener
  {
  public:
    ModuleProgressReporter(luci::Module *module, logo::PhaseStrategy strategy)
-      : _module{module}, _strategy{strategy}
+    : _module{module}, _strategy{strategy}
    {
      // DO NOTHING
    }
diff --git a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp

index 0f8d562e9b6c2549f76bae2fc39377b49ebb921d..de973a4311f310767442a21edd266092d62a8331 100644 (file)
--- a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
+++ b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
@@ -136,30 +136,34 @@ class ConstInputConcatGraph
  public:
    ConstInputConcatGraph(loco::DataType quant_type)
    {
-    concat_node.dtype(quant_type);
-    concat_node.fusedActivationFunction(luci::FusedActFunc::NONE);
-    input_1.dtype(loco::DataType::FLOAT32);
-    input_1.size<loco::DataType::FLOAT32>(5);
+    concat_node = g.nodes()->create<luci::CircleConcatenation>(2);
+    input_1 = g.nodes()->create<luci::CircleConst>();
+    input_2 = g.nodes()->create<luci::CircleConv2D>();
+
+    concat_node->dtype(quant_type);
+    concat_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+    input_1->dtype(loco::DataType::FLOAT32);
+    input_1->size<loco::DataType::FLOAT32>(5);
      for (int i = 0; i < 5; i++)
      {
        // Set data {-2, -1, 0, 1, 2}
-      input_1.at<loco::DataType::FLOAT32>(i) = i - 2.0;
+      input_1->at<loco::DataType::FLOAT32>(i) = i - 2.0;
      }
  
-    input_2.dtype(quant_type);
+    input_2->dtype(quant_type);
  
-    concat_node.values(0, &input_1);
-    concat_node.values(1, &input_2);
+    concat_node->values(0, input_1);
+    concat_node->values(1, input_2);
  
      if (quant_type == loco::DataType::U8)
      {
-      addQuantParam(concat_node, {0.1}, {10});
-      addQuantParam(input_2, {2.0}, {2});
+      addQuantParam(*concat_node, {0.1}, {10});
+      addQuantParam(*input_2, {2.0}, {2});
      }
      else if (quant_type == loco::DataType::S16)
      {
-      addQuantParam(concat_node, {0.1}, {0});
-      addQuantParam(input_2, {2.0}, {0});
+      addQuantParam(*concat_node, {0.1}, {0});
+      addQuantParam(*input_2, {2.0}, {0});
      }
      else
      {
@@ -167,16 +171,11 @@ public:
      }
    }
  
-  ~ConstInputConcatGraph()
-  {
-    concat_node.values(0, nullptr);
-    concat_node.values(1, nullptr);
-  }
-
  public:
-  luci::CircleConcatenation concat_node{2};
-  luci::CircleConst input_1;
-  luci::CircleConv2D input_2;
+  loco::Graph g;
+  luci::CircleConcatenation *concat_node = nullptr;
+  luci::CircleConst *input_1 = nullptr;
+  luci::CircleConv2D *input_2 = nullptr;
  };
  
  } // namespace
@@ -223,19 +222,20 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8)
  
    // input_1 is const. const values are quantized with the qparam of concat
    ConstInputConcatGraph cg(loco::DataType::U8);
-  luci::propagate_concat_quantparam(&cg.concat_node, loco::DataType::U8);
-  EXPECT_FLOAT_EQ(0.1, cg.concat_node.quantparam()->scale[0]);
-  EXPECT_EQ(10, cg.concat_node.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(0.1, cg.input_1.quantparam()->scale[0]);
-  EXPECT_EQ(10, cg.input_1.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(0.1, cg.input_2.quantparam()->scale[0]);
-  EXPECT_EQ(10, cg.input_2.quantparam()->zerop[0]);
-  EXPECT_EQ(loco::DataType::U8, cg.input_1.dtype());
-  EXPECT_EQ(0, cg.input_1.at<loco::DataType::U8>(0));
-  EXPECT_EQ(0, cg.input_1.at<loco::DataType::U8>(1));
-  EXPECT_EQ(10, cg.input_1.at<loco::DataType::U8>(2));
-  EXPECT_EQ(20, cg.input_1.at<loco::DataType::U8>(3));
-  EXPECT_EQ(30, cg.input_1.at<loco::DataType::U8>(4));
+  luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8);
+  EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+  EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
+  const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+  EXPECT_FLOAT_EQ(0.1, cg_input_1->quantparam()->scale[0]);
+  EXPECT_EQ(10, cg_input_1->quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(0.1, cg.input_2->quantparam()->scale[0]);
+  EXPECT_EQ(10, cg.input_2->quantparam()->zerop[0]);
+  EXPECT_EQ(loco::DataType::U8, cg_input_1->dtype());
+  EXPECT_EQ(0, cg_input_1->at<loco::DataType::U8>(0));
+  EXPECT_EQ(0, cg_input_1->at<loco::DataType::U8>(1));
+  EXPECT_EQ(10, cg_input_1->at<loco::DataType::U8>(2));
+  EXPECT_EQ(20, cg_input_1->at<loco::DataType::U8>(3));
+  EXPECT_EQ(30, cg_input_1->at<loco::DataType::U8>(4));
  }
  
  TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG)
@@ -260,20 +260,21 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG)
    // concat has fused activation function and input_1 is const.
    // const values are quantized using its min/max
    ConstInputConcatGraph cg(loco::DataType::U8);
-  cg.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
-  luci::propagate_concat_quantparam(&cg.concat_node, loco::DataType::U8);
-  EXPECT_FLOAT_EQ(0.1, cg.concat_node.quantparam()->scale[0]);
-  EXPECT_EQ(10, cg.concat_node.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(0.015686275, cg.input_1.quantparam()->scale[0]);
-  EXPECT_EQ(128, cg.input_1.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(2.0, cg.input_2.quantparam()->scale[0]);
-  EXPECT_EQ(2, cg.input_2.quantparam()->zerop[0]);
-  EXPECT_EQ(loco::DataType::U8, cg.input_1.dtype());
-  EXPECT_EQ(quantize(-2, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(0));
-  EXPECT_EQ(quantize(-1, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(1));
-  EXPECT_EQ(quantize(0, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(2));
-  EXPECT_EQ(quantize(1, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(3));
-  EXPECT_EQ(quantize(2, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(4));
+  cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8);
+  EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+  EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
+  const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+  EXPECT_FLOAT_EQ(0.015686275, cg_input_1->quantparam()->scale[0]);
+  EXPECT_EQ(128, cg_input_1->quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(2.0, cg.input_2->quantparam()->scale[0]);
+  EXPECT_EQ(2, cg.input_2->quantparam()->zerop[0]);
+  EXPECT_EQ(loco::DataType::U8, cg_input_1->dtype());
+  EXPECT_EQ(quantize(-2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(0));
+  EXPECT_EQ(quantize(-1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(1));
+  EXPECT_EQ(quantize(0, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(2));
+  EXPECT_EQ(quantize(1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(3));
+  EXPECT_EQ(quantize(2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(4));
  }
  
  TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
@@ -318,19 +319,20 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
  
    // input_1 is const. const values are quantized with the qparam of concat
    ConstInputConcatGraph cg(loco::DataType::S16);
-  luci::propagate_concat_quantparam(&cg.concat_node, loco::DataType::S16);
-  EXPECT_FLOAT_EQ(0.1, cg.concat_node.quantparam()->scale[0]);
-  EXPECT_EQ(0, cg.concat_node.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(0.1, cg.input_1.quantparam()->scale[0]);
-  EXPECT_EQ(0, cg.input_1.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(0.1, cg.input_2.quantparam()->scale[0]);
-  EXPECT_EQ(0, cg.input_2.quantparam()->zerop[0]);
-  EXPECT_EQ(loco::DataType::S16, cg.input_1.dtype());
-  EXPECT_EQ(-20, cg.input_1.at<loco::DataType::S16>(0));
-  EXPECT_EQ(-10, cg.input_1.at<loco::DataType::S16>(1));
-  EXPECT_EQ(0, cg.input_1.at<loco::DataType::S16>(2));
-  EXPECT_EQ(10, cg.input_1.at<loco::DataType::S16>(3));
-  EXPECT_EQ(20, cg.input_1.at<loco::DataType::S16>(4));
+  luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16);
+  EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
+  const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+  EXPECT_FLOAT_EQ(0.1, cg_input_1->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg_input_1->quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(0.1, cg.input_2->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg.input_2->quantparam()->zerop[0]);
+  EXPECT_EQ(loco::DataType::S16, cg_input_1->dtype());
+  EXPECT_EQ(-20, cg_input_1->at<loco::DataType::S16>(0));
+  EXPECT_EQ(-10, cg_input_1->at<loco::DataType::S16>(1));
+  EXPECT_EQ(0, cg_input_1->at<loco::DataType::S16>(2));
+  EXPECT_EQ(10, cg_input_1->at<loco::DataType::S16>(3));
+  EXPECT_EQ(20, cg_input_1->at<loco::DataType::S16>(4));
  }
  
  TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG)
@@ -355,18 +357,19 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG)
    // concat has fused activation function and input_1 is const.
    // const values are quantized using its min/max
    ConstInputConcatGraph cg(loco::DataType::S16);
-  cg.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
-  luci::propagate_concat_quantparam(&cg.concat_node, loco::DataType::S16);
-  EXPECT_FLOAT_EQ(0.1, cg.concat_node.quantparam()->scale[0]);
-  EXPECT_EQ(0, cg.concat_node.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(0.000061037, cg.input_1.quantparam()->scale[0]);
-  EXPECT_EQ(0, cg.input_1.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(2.0, cg.input_2.quantparam()->scale[0]);
-  EXPECT_EQ(0, cg.input_2.quantparam()->zerop[0]);
-  EXPECT_EQ(loco::DataType::S16, cg.input_1.dtype());
-  EXPECT_EQ(quantize(-2, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(0));
-  EXPECT_EQ(quantize(-1, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(1));
-  EXPECT_EQ(quantize(0, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(2));
-  EXPECT_EQ(quantize(1, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(3));
-  EXPECT_EQ(quantize(2, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(4));
+  cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16);
+  EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
+  const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+  EXPECT_FLOAT_EQ(0.000061037, cg_input_1->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg_input_1->quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(2.0, cg.input_2->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg.input_2->quantparam()->zerop[0]);
+  EXPECT_EQ(loco::DataType::S16, cg_input_1->dtype());
+  EXPECT_EQ(quantize(-2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(0));
+  EXPECT_EQ(quantize(-1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(1));
+  EXPECT_EQ(quantize(0, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(2));
+  EXPECT_EQ(quantize(1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(3));
+  EXPECT_EQ(quantize(2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(4));
  }
diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp

index af83cd83bbdbde1abedd76dace9e71a4a127f78e..26282086b153aa7ccf66bcd5a668f45bb73e14ee 100644 (file)
--- a/compiler/luci/pass/src/PropagateQuantParamPass.cpp
+++ b/compiler/luci/pass/src/PropagateQuantParamPass.cpp
@@ -91,9 +91,8 @@ bool PropagateQuantParamPass::run(loco::Graph *g)
      INFO(l) << "PropagateQuantParamPass visit node: " << circle_node->name() << std::endl;
  
      PropagateQuantParam pqp;
-    changed = circle_node->accept(&pqp);
-    if (changed)
-      break;
+    if (circle_node->accept(&pqp))
+      changed = true;
    }
  
    return changed;
diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp

index 15adbfc01360d7b53c07195942c025389ff00996..ed1f968283a875a7d3a16d4c469f7c94cd33895a 100644 (file)
--- a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp
+++ b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp
@@ -83,6 +83,13 @@ public:
  
  } // namespace
  
+TEST(PropagateQuantParamPassTest, name)
+{
+  luci::PropagateQuantParamPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
  TEST(PropagateQuantParam, simple)
  {
    SimpleGraph g;
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp

index fa0141114c23bb2a5e432fbb679445eb7da28cb7..85d600e4719aa9c83d2066346d4c3cbab20a6899 100644 (file)
--- a/compiler/luci/pass/src/QuantizationUtils.cpp
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -96,7 +96,7 @@ void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float
      data = data < nudged_min ? nudged_min : data;
      data = data > nudged_max ? nudged_max : data;
      quantized_values[i] =
-        static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
+      static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
    }
  
    node->dtype(loco::DataType::U8);      // change the type of tensor
@@ -133,14 +133,14 @@ void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float
    for (uint32_t i = 0; i < size; ++i)
    {
      node->at<loco::DataType::S16>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
    }
  }
  
  void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
                            float &nudged_min, float &nudged_max)
  {
-  assert(min != max);
+  assert(min <= max);
  
    const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
    const int32_t kMinScale = -kMaxScale;
@@ -158,8 +158,8 @@ void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &
      scale_factor_from_max_side = rmax / qmax_double;
  
    scaling_factor = scale_factor_from_min_side > scale_factor_from_max_side
-                       ? scale_factor_from_min_side
-                       : scale_factor_from_max_side;
+                     ? scale_factor_from_min_side
+                     : scale_factor_from_max_side;
    zp = 0;
    nudged_min = static_cast<float>(qmin_double * scaling_factor);
    nudged_max = static_cast<float>(qmax_double * scaling_factor);
@@ -226,7 +226,8 @@ void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t
    zp = nudged_zero_point;
  }
  
-bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int &channel_dim_index)
+bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
+                           int32_t &channel_dim_index)
  {
    auto succs = loco::succs(node);
  
@@ -304,7 +305,7 @@ bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int
  uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
  {
    return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
-             dimension.dim(3).value() +
+           dimension.dim(3).value() +
           indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
           indices[2] * dimension.dim(3).value() + indices[3];
  }
diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h

index 22a5cf1eeef497156870d038723349e2fadc2cb3..c8c558d3cf3dbb9deee805a0bb9f0e79fc304cb2 100644 (file)
--- a/compiler/luci/pass/src/QuantizationUtils.h
+++ b/compiler/luci/pass/src/QuantizationUtils.h
@@ -37,7 +37,8 @@ void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float
                                              float &scaling_factor, int64_t &zp, float &nudged_min,
                                              float &nudged_max);
  
-bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int &channel_dim_index);
+bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
+                           int32_t &channel_dim_index);
  
  uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices);
  
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp

index e10c4bb4dd3227b881cc803fba34fd33499e5cd1..e99c7b3892d81bf3ed50e505e4ba5d79c81ffab0 100644 (file)
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -24,33 +24,29 @@
  
  #include <iostream>
  #include <cmath>
-
-namespace luci
-{
+#include <functional>
  
  namespace
  {
  
-void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max)
+using namespace luci;
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, IterFunc func)
  {
    loco::TensorShape dimension;
    dimension.rank(4);
    uint32_t indices[4] = {
-      0,
+    0,
    };
-  int channel_dim_index{0};
-  int size{0};
+  int32_t channel_dim_index{0};
  
    if (!get_channel_dim_index(node, dimension, channel_dim_index))
    {
      assert(false);
      return;
    }
-  size = dimension.dim(channel_dim_index).value();
  
-  std::vector<bool> has_min_max_value(size, false);
-  min.resize(size);
-  max.resize(size);
    for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
    {
      for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
@@ -59,25 +55,57 @@ void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vec
        {
          for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
          {
-          int channel_idx = indices[channel_dim_index];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          if (has_min_max_value[channel_idx])
-          {
-            min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
-            max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
-          }
-          else
-          {
-            min[channel_idx] = data;
-            max[channel_idx] = data;
-            has_min_max_value[channel_idx] = true;
-          }
+          func(indices, dimension, channel_dim_index);
          }
        }
      }
    }
  }
  
+} // namespace
+
+namespace luci
+{
+
+namespace
+{
+
+void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max)
+{
+  loco::TensorShape dimension;
+  dimension.rank(4);
+  int32_t channel_dim_index{0};
+
+  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  {
+    assert(false);
+    return;
+  }
+  auto size = dimension.dim(channel_dim_index).value();
+
+  std::vector<bool> has_min_max_value(size, false);
+  min.resize(size);
+  max.resize(size);
+
+  auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    if (has_min_max_value[channel_idx])
+    {
+      min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+      max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+    }
+    else
+    {
+      min[channel_idx] = data;
+      max[channel_idx] = data;
+      has_min_max_value[channel_idx] = true;
+    }
+  };
+
+  iterate_per_channel(node, cal_minmax);
+}
+
  void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
                              std::vector<float> &scaling_factor, std::vector<int64_t> &zp,
                              std::vector<float> &nudged_min, std::vector<float> &nudged_max)
@@ -94,45 +122,24 @@ void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vec
      compute_sym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
    }
  
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
    };
-  int channel_dim_index{0};
-
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
  
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
-          data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
-          quantized_values[cal_offset(dimension, indices)] =
-              static_cast<int32_t>(std::round(data * scaling_factor_inv));
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, quantize);
  
    node->dtype(loco::DataType::S16);      // change the type of tensor
    node->size<loco::DataType::S16>(size); // resize tensor
    for (uint32_t i = 0; i < size; ++i)
    {
      node->at<loco::DataType::S16>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
    }
  }
  
@@ -142,35 +149,14 @@ void sym_wdequant_per_channel(CircleConst *node, std::vector<float> &scaling_fac
    uint32_t size = node->size<loco::DataType::S16>();
    std::vector<float> dequantized_values(size);
  
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto dequantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::S16>(cal_offset(dimension, indices));
+    dequantized_values[cal_offset(dimension, indices)] =
+      static_cast<float>(data) * scaling_factor[channel_idx];
    };
-  int channel_dim_index{0};
-
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
  
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          auto data = node->at<loco::DataType::S16>(cal_offset(dimension, indices));
-          dequantized_values[cal_offset(dimension, indices)] =
-              static_cast<float>(data) * scaling_factor[channel_idx];
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, dequantize);
  
    node->dtype(loco::DataType::FLOAT32);      // change the type of tensor
    node->size<loco::DataType::FLOAT32>(size); // resize tensor
@@ -198,38 +184,17 @@ void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
      compute_asym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
    }
  
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
    };
-  int channel_dim_index{0};
-
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
  
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
-          data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
-          quantized_values[cal_offset(dimension, indices)] = static_cast<int32_t>(
-              std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, quantize);
  
    node->dtype(loco::DataType::U8);      // change the type of tensor
    node->size<loco::DataType::U8>(size); // resize tensor
@@ -246,35 +211,14 @@ void asymmetric_wdequant_per_channel(CircleConst *node, std::vector<float> &scal
    uint32_t size = node->size<loco::DataType::U8>();
    std::vector<float> dequantized_values(size);
  
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto dequantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::U8>(cal_offset(dimension, indices));
+    dequantized_values[cal_offset(dimension, indices)] =
+      static_cast<float>(data) * scaling_factor[channel_idx] + nudged_min[channel_idx];
    };
-  int channel_dim_index{0};
-
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
  
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          auto data = node->at<loco::DataType::U8>(cal_offset(dimension, indices));
-          dequantized_values[cal_offset(dimension, indices)] =
-              static_cast<float>(data) * scaling_factor[channel_idx] + nudged_min[channel_idx];
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, dequantize);
  
    node->dtype(loco::DataType::FLOAT32);      // change the type of tensor
    node->size<loco::DataType::FLOAT32>(size); // resize tensor
@@ -311,7 +255,7 @@ struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<b
  {
    QuantizeDequantizeWeights(loco::DataType input, loco::DataType output,
                              QuantizationGranularity granularity)
-      : input_type(input), output_type(output), granularity(granularity)
+    : input_type(input), output_type(output), granularity(granularity)
    {
    }
  
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp

new file mode 100644 (file)

index 0000000..f226253
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(QuantizeDequantizeWeightsPassTest, name)
+{
+  luci::QuantizeDequantizeWeightsPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
+                                           luci::QuantizationGranularity::LayerWise);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

index f6eebe3b9c2217dc160273cd6a77cef06b678c85..4707ad0e9e1c6824e3f557109652ad3e7af5a91c 100644 (file)
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -19,12 +19,51 @@
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
  #include <luci/Log.h>
  
  #include <oops/UserExn.h>
  
  #include <iostream>
  #include <cmath>
+#include <functional>
+
+namespace
+{
+
+using namespace luci;
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
+{
+  loco::TensorShape dimension;
+  dimension.rank(4);
+  uint32_t indices[4] = {
+    0,
+  };
+
+  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  {
+    assert(false);
+    return;
+  }
+
+  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+  {
+    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+    {
+      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+      {
+        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+        {
+          func(indices, dimension, channel_dim_index);
+        }
+      }
+    }
+  }
+}
+
+} // namespace
  
  namespace luci
  {
@@ -32,6 +71,30 @@ namespace luci
  namespace
  {
  
+// Create a new const node from an existing node.
+// The new node has the following characteristics
+// type: T
+// shape: same with 'node' (given as an argument)
+// buffer size: 'size' (given as an argument)
+// Note that contents are not filled in this function.
+template <loco::DataType T>
+luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size)
+{
+  auto new_node = node->graph()->nodes()->create<CircleConst>();
+  // TODO: We don't have any naming convention for quantized nodes yet.
+  //       Fix this when we have one.
+  new_node->name(node->name());
+  new_node->dtype(T);
+  new_node->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    new_node->dim(i).set(node->dim(i).value());
+
+  new_node->size<T>(size);
+  new_node->shape_status(luci::ShapeStatus::VALID);
+
+  return new_node;
+}
+
  void overwrite_quantparam(luci::CircleConcatenation *concat, luci::CircleNode *target)
  {
    auto concat_qparam = concat->quantparam();
@@ -44,6 +107,9 @@ void overwrite_quantparam(luci::CircleConcatenation *concat, luci::CircleNode *t
      auto quantparam = std::make_unique<CircleQuantParam>();
      target->quantparam(std::move(quantparam));
      target_qparam = target->quantparam();
+
+    if (target_qparam == nullptr)
+      throw std::runtime_error("Creating new quant param failed");
    }
    target_qparam->min = concat_qparam->min;
    target_qparam->max = concat_qparam->max;
@@ -79,7 +145,7 @@ void quant_const_values(luci::CircleConst *const_node, float scaling_factor, flo
        const_node->size<loco::DataType::S16>(size); // resize tensor
        for (uint32_t i = 0; i < size; ++i)
          const_node->at<loco::DataType::S16>(i) =
-            std::min(32767, std::max(-32767, quantized_values[i]));
+          std::min(32767, std::max(-32767, quantized_values[i]));
        break;
      default:
        throw std::runtime_error("Unsupported data type");
@@ -219,17 +285,16 @@ void quant_const(CircleConst *node, loco::DataType quant_type)
  }
  
  // Check if the node is the bias of Conv2D, DepthwiseConv2D, FullyConnected, or TransposeConv layer
-// If true, return <input, weight> pair of the successor node (used to quantize bias)
-// If flase, return <nullptr, nullptr>
-std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
+// Returns a list of <input, weights, output> vectors for the above operators.
+// Note that it returns a 'list' because bias can be used by multiple operators.
+std::vector<std::vector<loco::Node *>> get_input_weight_output_of_bias(CircleNode *node)
  {
+  std::vector<std::vector<loco::Node *>> result;
    auto circle_const = dynamic_cast<CircleConst *>(node);
    if (circle_const == nullptr)
-    return std::make_pair(nullptr, nullptr);
+    return result;
  
    auto succs = loco::succs(node);
-  if (succs.size() != 1) // assume bias is used by only one node
-    return std::make_pair(nullptr, nullptr);
  
    for (auto out : succs)
    {
@@ -238,35 +303,39 @@ std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
      {
        assert(conv->input() != nullptr);
        assert(conv->filter() != nullptr);
-      return std::make_pair(conv->input(), conv->filter());
+      result.push_back({conv->input(), conv->filter(), conv});
+      continue;
      }
      auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
      if (dw_conv != nullptr && dw_conv->bias() == circle_const)
      {
        assert(dw_conv->input() != nullptr);
        assert(dw_conv->filter() != nullptr);
-      return std::make_pair(dw_conv->input(), dw_conv->filter());
+      result.push_back({dw_conv->input(), dw_conv->filter(), dw_conv});
+      continue;
      }
      auto fc = dynamic_cast<CircleFullyConnected *>(out);
      if (fc != nullptr && fc->bias() == circle_const)
      {
        assert(fc->input() != nullptr);
        assert(fc->weights() != nullptr);
-      return std::make_pair(fc->input(), fc->weights());
+      result.push_back({fc->input(), fc->weights(), fc});
+      continue;
      }
      auto tconv = dynamic_cast<CircleTransposeConv *>(out);
      if (tconv != nullptr && tconv->bias() == circle_const)
      {
        assert(tconv->outBackprop() != nullptr);
        assert(tconv->filter() != nullptr);
-      return std::make_pair(tconv->outBackprop(), tconv->filter());
+      result.push_back({tconv->outBackprop(), tconv->filter(), tconv});
+      continue;
      }
    }
-  return std::make_pair(nullptr, nullptr);
+  return result;
  }
  
-void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
-                               float *scaling_factor, int64_t *zp)
+CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
+                                       float *scaling_factor, int64_t *zp)
  {
    float scale = input_scale * weight_scale;
    const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
@@ -276,24 +345,27 @@ void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weigh
    for (uint32_t i = 0; i < size; ++i)
    {
      quantized_values[i] =
-        static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+      static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
    }
  
-  node->dtype(loco::DataType::S32);      // change the type of tensor
-  node->size<loco::DataType::S32>(size); // resize tensor
+  auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
    const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
    const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
    for (uint32_t i = 0; i < size; ++i)
    {
-    node->at<loco::DataType::S32>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+    new_bias->at<loco::DataType::S32>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
    }
    *scaling_factor = scale;
    *zp = 0;
+
+  return new_bias;
  }
  
-void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<float> &weight_scale,
-                            std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
+CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale,
+                                    std::vector<float> &weight_scale,
+                                    std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
  {
    float scaling_factor_inv{0};
  
@@ -305,24 +377,27 @@ void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<fl
      scaling_factor[i] = input_scale * weight_scale[i];
      scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
      quantized_values[i] =
-        static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+      static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
      zp[i] = 0;
    }
  
-  node->dtype(loco::DataType::S32);      // change the type of tensor
-  node->size<loco::DataType::S32>(size); // resize tensor
+  auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
    const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
    const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
    for (uint32_t i = 0; i < size; ++i)
    {
-    node->at<loco::DataType::S32>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+    new_bias->at<loco::DataType::S32>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
    }
+
+  return new_bias;
  }
  
-void int16_quant_bias_per_channel(CircleConst *node, float input_scale,
-                                  std::vector<float> &weight_scale,
-                                  std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
+CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale,
+                                          std::vector<float> &weight_scale,
+                                          std::vector<float> &scaling_factor,
+                                          std::vector<int64_t> &zp)
  {
    float scaling_factor_inv{0};
  
@@ -334,16 +409,18 @@ void int16_quant_bias_per_channel(CircleConst *node, float input_scale,
      scaling_factor[i] = input_scale * weight_scale[i];
      scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
      quantized_values[i] =
-        static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+      static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
      zp[i] = 0;
    }
  
-  node->dtype(loco::DataType::S64);      // change the type of tensor
-  node->size<loco::DataType::S64>(size); // resize tensor
+  auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size);
+
    for (uint32_t i = 0; i < size; ++i)
    {
-    node->at<loco::DataType::S64>(i) = quantized_values[i];
+    new_bias->at<loco::DataType::S64>(i) = quantized_values[i];
    }
+
+  return new_bias;
  }
  
  bool has_min_max(const CircleNode *node)
@@ -362,42 +439,22 @@ void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_facto
    uint32_t size = node->size<loco::DataType::FLOAT32>();
    std::vector<int32_t> quantized_values(size);
  
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
    };
  
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
-
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          quantized_values[cal_offset(dimension, indices)] =
-              static_cast<int32_t>(std::round(data * scaling_factor_inv));
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, channel_dim_index, quantize);
  
    node->dtype(loco::DataType::S16);      // change the type of tensor
    node->size<loco::DataType::S16>(size); // resize tensor
    for (uint32_t i = 0; i < size; ++i)
    {
      node->at<loco::DataType::S16>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
    }
  }
  
@@ -412,35 +469,15 @@ void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
    uint32_t size = node->size<loco::DataType::FLOAT32>();
    std::vector<int32_t> quantized_values(size);
  
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
    };
  
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
-
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          quantized_values[cal_offset(dimension, indices)] =
-              static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, channel_dim_index, quantize);
  
    node->dtype(loco::DataType::U8);      // change the type of tensor
    node->size<loco::DataType::U8>(size); // resize tensor
@@ -473,6 +510,21 @@ void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
    }
  }
  
+void set_bias(luci::CircleNode *node, luci::CircleConst *bias)
+{
+  if (auto conv = dynamic_cast<CircleConv2D *>(node))
+    conv->bias(bias);
+  else if (auto dconv = dynamic_cast<CircleDepthwiseConv2D *>(node))
+    dconv->bias(bias);
+  else if (auto tconv = dynamic_cast<CircleTransposeConv *>(node))
+    tconv->bias(bias);
+  else if (auto fc = dynamic_cast<CircleFullyConnected *>(node))
+    fc->bias(bias);
+  else
+    throw std::runtime_error("Only convolution, depthwise convolution, transposed convolution, and "
+                             "fully-connected layer have bias");
+}
+
  /**
   * @brief QuantizeActivation quantizes tensors for activations
   * @details Quantize using recorded min/max values
@@ -480,7 +532,7 @@ void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
  struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
  {
    QuantizeActivation(loco::DataType input, loco::DataType output)
-      : input_type(input), output_type(output)
+    : input_type(input), output_type(output)
    {
    }
  
@@ -503,8 +555,12 @@ struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
          continue;
  
        // Check if this is bias (bias is quantized later)
-      auto iw = get_input_weight_of_bias(circle_node);
-      if (iw.first != nullptr && iw.second != nullptr)
+      auto iwo = get_input_weight_output_of_bias(circle_node);
+      if (iwo.size() > 0)
+        continue;
+
+      // Check if this is bool type (bool type is not quantized)
+      if (circle_node->dtype() == loco::DataType::BOOL)
          continue;
  
        // Check if this is activation
@@ -547,7 +603,7 @@ struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
  struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
  {
    QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
-      : input_type(input), output_type(output), granularity(gr)
+    : input_type(input), output_type(output), granularity(gr)
    {
    }
  
@@ -562,65 +618,77 @@ struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
      if (is_quantized(node))
        return false;
  
-    // Check if this is bias
-    auto iw = get_input_weight_of_bias(node);
-    if (iw.first == nullptr || iw.second == nullptr)
-      return false;
-
-    auto input = loco::must_cast<luci::CircleNode *>(iw.first);
-    auto weight = loco::must_cast<luci::CircleNode *>(iw.second);
+    auto iwo_list = get_input_weight_output_of_bias(node);
  
-    if (granularity == QuantizationGranularity::ChannelWise)
+    for (auto iwo : iwo_list)
      {
-      assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
-      auto input_scale = input->quantparam()->scale[0];
+      assert(iwo.size() == 3);
  
-      assert(weight->quantparam() != nullptr); // weight scale's channel-wise
-      auto weight_scale = weight->quantparam()->scale;
+      auto input = loco::must_cast<luci::CircleNode *>(iwo[0]);
+      auto weight = loco::must_cast<luci::CircleNode *>(iwo[1]);
+      auto output = loco::must_cast<luci::CircleNode *>(iwo[2]);
  
-      auto circle_const = loco::must_cast<luci::CircleConst *>(node);
+      auto const_bias = loco::must_cast<luci::CircleConst *>(node);
+      assert(const_bias->dtype() == loco::DataType::FLOAT32);
  
-      uint32_t size = circle_const->size<loco::DataType::FLOAT32>();
-      assert(size == weight_scale.size());
-      std::vector<float> scaling_factor(size);
-      std::vector<int64_t> zp(size);
+      CircleConst *new_bias = nullptr;
  
-      if (output_type == loco::DataType::U8)
-      {
-        quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
-      }
-      else if (output_type == loco::DataType::S16)
+      if (granularity == QuantizationGranularity::ChannelWise)
        {
-        int16_quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
+        assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
+        auto input_scale = input->quantparam()->scale[0];
+
+        assert(weight->quantparam() != nullptr); // weight scale's channel-wise
+        auto weight_scale = weight->quantparam()->scale;
+
+        uint32_t size = const_bias->size<loco::DataType::FLOAT32>();
+        assert(size == weight_scale.size());
+        std::vector<float> scaling_factor(size);
+        std::vector<int64_t> zp(size);
+
+        if (output_type == loco::DataType::U8)
+        {
+          new_bias =
+            quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+        }
+        else if (output_type == loco::DataType::S16)
+        {
+          new_bias =
+            int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+        }
+        else
+        {
+          throw std::runtime_error("Unsupported quantization type.");
+        }
+
+        auto quantparam = std::make_unique<CircleQuantParam>();
+        quantparam->scale = scaling_factor;
+        quantparam->zerop = zp;
+        assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+        new_bias->quantparam(std::move(quantparam));
+
+        set_bias(output, new_bias);
        }
        else
        {
-        throw std::runtime_error("Unsupported quantization type.");
-      }
+        assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
+        auto input_scale = input->quantparam()->scale[0];
  
-      auto quantparam = std::make_unique<CircleQuantParam>();
-      quantparam->scale = scaling_factor;
-      quantparam->zerop = zp;
-      assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
-      circle_const->quantparam(std::move(quantparam));
-    }
-    else
-    {
-      assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
-      auto input_scale = input->quantparam()->scale[0];
-
-      assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
-      auto weight_scale = weight->quantparam()->scale[0];
-
-      auto circle_const = loco::must_cast<luci::CircleConst *>(node);
-      float scaling_factor{0};
-      int64_t zp{0};
-      asym_quant_bias_per_layer(circle_const, input_scale, weight_scale, &scaling_factor, &zp);
-      auto quantparam = std::make_unique<CircleQuantParam>();
-      quantparam->scale.push_back(scaling_factor);
-      quantparam->zerop.push_back(zp);
-      assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
-      circle_const->quantparam(std::move(quantparam));
+        assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
+        auto weight_scale = weight->quantparam()->scale[0];
+
+        float scaling_factor{0};
+        int64_t zp{0};
+        new_bias =
+          asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp);
+        auto quantparam = std::make_unique<CircleQuantParam>();
+        quantparam->scale.push_back(scaling_factor);
+        quantparam->zerop.push_back(zp);
+        assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+        new_bias->quantparam(std::move(quantparam));
+
+        set_bias(output, new_bias);
+      }
      }
      return false;
    }
@@ -633,7 +701,7 @@ struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
  struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
  {
    QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
-      : input_type(input), output_type(output), granularity(gr)
+    : input_type(input), output_type(output), granularity(gr)
    {
    }
  
@@ -641,116 +709,179 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
    loco::DataType output_type;
    QuantizationGranularity granularity;
  
-  // Quantize input tensors of each node
-  bool visit(luci::CircleNode *node)
+private:
+  void quantize_weights(luci::CircleConst *weights)
    {
-    LOGGER(l);
-    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-    auto arity = node->arity();
-    for (uint32_t i = 0; i < arity; i++)
+    // Find min/max per channel-wise
+    if (granularity == QuantizationGranularity::ChannelWise)
      {
-      auto input_node = node->arg(i);
-      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+      auto quantparam = weights->quantparam();
+      if (quantparam == nullptr)
+      {
+        assert(false && "quantparam is nullptr");
+        return;
+      }
  
-      // Check if this is already quantized
-      if (is_quantized(circle_node))
-        continue;
+      auto min = quantparam->min;
+      auto scaling_factor = quantparam->scale;
+      int32_t channel_dim_index = 0;
  
-      if (is_weights(circle_node))
+      if (output_type == loco::DataType::U8)
        {
-        auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
-
-        // Find min/max per channel-wise
-        if (granularity == QuantizationGranularity::ChannelWise)
-        {
-          auto quantparam = circle_node->quantparam();
-          if (quantparam == nullptr)
-          {
-            assert(false && "quantparam is nullptr");
-            return false;
-          }
-
-          auto min = quantparam->min;
-          auto scaling_factor = quantparam->scale;
-          int32_t channel_dim_index = 0;
-
-          if (output_type == loco::DataType::U8)
-          {
-            asym_wquant_per_channel(circle_const, min, scaling_factor, channel_dim_index);
-          }
-          else
-          {
-            sym_wquant_per_channel(circle_const, scaling_factor, channel_dim_index);
-          }
-          quantparam->min.clear();
-          quantparam->max.clear();
-          quantparam->quantized_dimension = channel_dim_index;
-        }
-        // Find min/max per layer-wise
-        else
-        {
-          // Quantize using recorded quantparam
-          auto quantparam = circle_node->quantparam();
-          assert(quantparam != nullptr);
-          assert(quantparam->min.size() == 1);   // only support layer-wise quant
-          assert(quantparam->scale.size() == 1); // only support layer-wise quant
-          auto min = quantparam->min[0];
-          auto scaling_factor = quantparam->scale[0];
-          asym_wquant_per_layer(circle_const, min, scaling_factor);
-          quantparam->min.clear();
-          quantparam->max.clear();
-        }
+        asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index);
+      }
+      else
+      {
+        sym_wquant_per_channel(weights, scaling_factor, channel_dim_index);
        }
+      quantparam->min.clear();
+      quantparam->max.clear();
+      quantparam->quantized_dimension = channel_dim_index;
+    }
+    // Find min/max per layer-wise
+    else
+    {
+      // Quantize using recorded quantparam
+      auto quantparam = weights->quantparam();
+      assert(quantparam != nullptr);
+      assert(quantparam->min.size() == 1);   // only support layer-wise quant
+      assert(quantparam->scale.size() == 1); // only support layer-wise quant
+      auto min = quantparam->min[0];
+      auto scaling_factor = quantparam->scale[0];
+      asym_wquant_per_layer(weights, min, scaling_factor);
+      quantparam->min.clear();
+      quantparam->max.clear();
      }
-    return false;
    }
-};
  
-void quant_instnorm(luci::CircleInstanceNorm *node, loco::DataType output_type,
-                    QuantizationGranularity granularity)
-{
-  auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
-  auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
-  assert(gamma->dtype() == loco::DataType::FLOAT32);
-  assert(beta->dtype() == loco::DataType::FLOAT32);
+  bool visit(luci::CircleConv2D *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
  
-  if (granularity == QuantizationGranularity::LayerWise)
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    if (!is_quantized(weights))
+    {
+      auto new_weights = luci::clone(weights);
+      node->filter(new_weights);
+      quantize_weights(new_weights);
+      return true;
+    }
+    return false;
+  }
+
+  bool visit(luci::CircleDepthwiseConv2D *node)
    {
-    quant_const(gamma, output_type);
-    quant_const(beta, output_type);
+    LOGGER(l);
+    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    if (!is_quantized(weights))
+    {
+      auto new_weights = luci::clone(weights);
+      node->filter(new_weights);
+      quantize_weights(new_weights);
+      return true;
+    }
+    return false;
    }
-  else if (granularity == QuantizationGranularity::ChannelWise)
+
+  bool visit(luci::CircleInstanceNorm *node)
    {
-    quant_const_per_channel(gamma, output_type);
-    quant_const_per_channel(beta, output_type);
+    LOGGER(l);
+    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+    auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
+    auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
+
+    bool changed = false;
+    if (!is_quantized(gamma))
+    {
+      assert(gamma->dtype() == loco::DataType::FLOAT32);
+      auto new_gamma = luci::clone(gamma);
+      if (granularity == QuantizationGranularity::LayerWise)
+        quant_const(new_gamma, output_type);
+      else if (granularity == QuantizationGranularity::ChannelWise)
+        quant_const_per_channel(new_gamma, output_type);
+      node->gamma(new_gamma);
+      changed = true;
+    }
+    if (!is_quantized(beta))
+    {
+      assert(beta->dtype() == loco::DataType::FLOAT32);
+      auto new_beta = luci::clone(beta);
+      if (granularity == QuantizationGranularity::LayerWise)
+        quant_const(new_beta, output_type);
+      else if (granularity == QuantizationGranularity::ChannelWise)
+        quant_const_per_channel(new_beta, output_type);
+      node->beta(new_beta);
+      changed = true;
+    }
+
+    return changed;
    }
-  else
-    throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
-}
  
-void quant_prelu(luci::CirclePRelu *node, loco::DataType output_type,
-                 QuantizationGranularity granularity)
-{
-  auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
-  assert(alpha->dtype() == loco::DataType::FLOAT32);
+  bool visit(luci::CirclePRelu *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+    auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
+
+    if (!is_quantized(alpha))
+    {
+      assert(alpha->dtype() == loco::DataType::FLOAT32);
+      auto new_alpha = luci::clone(alpha);
+      if (granularity == QuantizationGranularity::LayerWise)
+        quant_const(new_alpha, output_type);
+      else if (granularity == QuantizationGranularity::ChannelWise)
+        quant_const_per_channel(new_alpha, output_type);
+      node->alpha(new_alpha);
+      return true;
+    }
  
-  if (granularity == QuantizationGranularity::LayerWise)
+    return false;
+  }
+
+  bool visit(luci::CircleTransposeConv *node)
    {
-    quant_const(alpha, output_type);
+    LOGGER(l);
+    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    if (!is_quantized(weights))
+    {
+      auto new_weights = luci::clone(weights);
+      node->filter(new_weights);
+      quantize_weights(new_weights);
+      return true;
+    }
+    return false;
    }
-  else if (granularity == QuantizationGranularity::ChannelWise)
+
+  bool visit(luci::CircleFullyConnected *node)
    {
-    quant_const_per_channel(alpha, output_type);
+    LOGGER(l);
+    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
+    if (!is_quantized(weights))
+    {
+      auto new_weights = luci::clone(weights);
+      node->weights(new_weights);
+      quantize_weights(new_weights);
+      return true;
+    }
+    return false;
    }
-  else
-    throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
-}
+
+  bool visit(luci::CircleNode *) { return false; }
+};
  
  /**
   * @brief Quantize const input tensors using min/max of const values
   */
-void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type,
-                           QuantizationGranularity granularity)
+void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
  {
    auto opcode = node->opcode();
    auto arity = node->arity();
@@ -763,6 +894,8 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type,
      case luci::CircleOpcode::CONV_2D:
      case luci::CircleOpcode::DEPTHWISE_CONV_2D:
      case luci::CircleOpcode::FULLY_CONNECTED:
+    case luci::CircleOpcode::INSTANCE_NORM:
+    case luci::CircleOpcode::PRELU:
      case luci::CircleOpcode::TRANSPOSE_CONV:
        // Handled in QuantizeWeights and QuantizeBias
        break;
@@ -771,8 +904,13 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type,
        // Handled in propagate_concat_quantparam
        break;
  
+    case luci::CircleOpcode::LOGICAL_OR:
+      // Inputs of logical Ops are bool, thus not quantized
+      break;
+
      case luci::CircleOpcode::ARG_MAX:
      case luci::CircleOpcode::ARG_MIN:
+    case luci::CircleOpcode::BATCH_TO_SPACE_ND:
      case luci::CircleOpcode::MEAN:
      case luci::CircleOpcode::PAD:
      case luci::CircleOpcode::REDUCE_ANY:
@@ -783,6 +921,9 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type,
      case luci::CircleOpcode::RESIZE_BILINEAR:
      case luci::CircleOpcode::RESIZE_NEAREST_NEIGHBOR:
      case luci::CircleOpcode::REVERSE_SEQUENCE:
+    case luci::CircleOpcode::SLICE:
+    case luci::CircleOpcode::SPACE_TO_BATCH_ND:
+    case luci::CircleOpcode::STRIDED_SLICE:
      case luci::CircleOpcode::SUM:
      case luci::CircleOpcode::TILE:
      case luci::CircleOpcode::TOPK_V2:
@@ -791,41 +932,53 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type,
        // Ex: axis, paddings
        input_node = node->arg(0);
        const_node = dynamic_cast<luci::CircleConst *>(input_node);
-      if (const_node != nullptr)
+      if (const_node != nullptr && !is_quantized(const_node))
          quant_const(const_node, output_type);
        break;
  
-    case luci::CircleOpcode::INSTANCE_NORM:
-      quant_instnorm(loco::must_cast<luci::CircleInstanceNorm *>(node), output_type, granularity);
-      break;
-
-    case luci::CircleOpcode::PRELU:
-      quant_prelu(loco::must_cast<luci::CirclePRelu *>(node), output_type, granularity);
-      break;
-
      case luci::CircleOpcode::ADD:
      case luci::CircleOpcode::ADD_N:
+    case luci::CircleOpcode::DEPTH_TO_SPACE:
      case luci::CircleOpcode::DIV:
+    case luci::CircleOpcode::ELU:
      case luci::CircleOpcode::EQUAL:
+    case luci::CircleOpcode::FLOOR:
+    case luci::CircleOpcode::FLOOR_DIV:
      case luci::CircleOpcode::GREATER:
      case luci::CircleOpcode::GREATER_EQUAL:
      case luci::CircleOpcode::LESS:
      case luci::CircleOpcode::LESS_EQUAL:
+    case luci::CircleOpcode::LOGISTIC:
      case luci::CircleOpcode::MAXIMUM:
      case luci::CircleOpcode::MINIMUM:
      case luci::CircleOpcode::MUL:
      case luci::CircleOpcode::NOT_EQUAL:
+    case luci::CircleOpcode::POW:
+    case luci::CircleOpcode::RSQRT:
+    case luci::CircleOpcode::SOFTMAX:
+    case luci::CircleOpcode::SPACE_TO_DEPTH:
+    case luci::CircleOpcode::SQRT:
      case luci::CircleOpcode::SUB:
+    case luci::CircleOpcode::TANH:
        // Quantize all const inputs using their values
        for (uint32_t i = 0; i < arity; i++)
        {
          input_node = node->arg(i);
          const_node = dynamic_cast<luci::CircleConst *>(input_node);
-        if (const_node != nullptr)
+        if (const_node != nullptr && !is_quantized(const_node))
            quant_const(const_node, output_type);
        }
        break;
  
+    case luci::CircleOpcode::SPLIT:
+      // Only the second input is quantized
+      // First input should not be quantized (e.g., split_dim)
+      input_node = node->arg(1);
+      const_node = dynamic_cast<luci::CircleConst *>(input_node);
+      if (const_node != nullptr && !is_quantized(const_node))
+        quant_const(const_node, output_type);
+      break;
+
      default:
        for (uint32_t i = 0; i < arity; i++)
        {
@@ -850,8 +1003,8 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type,
   *                        (U8 qparam2)
   *
   *  AFTER
- *         [CircleNode]             [CircleConst]
- *         (U8 qparam2)             (U8 qparam2)
+ *         [CircleNode]             [CircleConst]   [CircleConst] <- Dead node
+ *         (U8 qparam2)             (U8 qparam2)       (FP32)
   *                   \                    /
   *                    \                  /
   *                    [CircleConcatenation]
@@ -871,7 +1024,11 @@ void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataTy
        auto node = concat->arg(i);
        auto const_node = dynamic_cast<luci::CircleConst *>(node);
        if (const_node != nullptr)
-        quant_const(const_node, quant_type);
+      {
+        auto new_const = luci::clone(const_node);
+        quant_const(new_const, quant_type);
+        concat->values(i, new_const);
+      }
      }
      return;
    }
@@ -884,20 +1041,6 @@ void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataTy
      if (node->opcode() == luci::CircleOpcode::CONCATENATION)
        continue;
  
-    // Skip if this input is used by other Ops
-    auto succs = loco::succs(node);
-    if (succs.size() != 1)
-    {
-      if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
-      {
-        luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
-        quant_const(const_node, quant_type);
-      }
-      continue;
-    }
-
-    assert(succs.find(concat) != succs.end());
-
      // Quantize constant values
      if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
      {
@@ -913,15 +1056,21 @@ void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataTy
        const auto scaling_factor = concat_qparam->scale[0];
        const auto zerop = concat_qparam->zerop[0];
  
-      quant_const_values(const_node, scaling_factor, zerop, quant_type);
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, quant_type);
+      concat->values(i, new_const);
+      overwrite_quantparam(concat, new_const);
      }
      else
      {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        continue;
+
        // Non-const input must have been quantized
        assert(node->quantparam() != nullptr);
+      overwrite_quantparam(concat, node);
      }
-
-    overwrite_quantparam(concat, node);
    }
  }
  
@@ -954,13 +1103,6 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
      circle_node->accept(&qb);
    }
  
-  // Quantize const inputs other than weights and bias
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
-  {
-    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    quantize_const_inputs(circle_node, _output_dtype, _granularity);
-  }
-
    // Propagate quantization parameters of concat Op
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
@@ -976,6 +1118,13 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
      propagate_concat_quantparam(concat, _output_dtype);
    }
  
+  // Quantize const inputs other than weights and bias
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    quantize_const_inputs(circle_node, _output_dtype);
+  }
+
    // Update output dtype
    auto graph_outputs = g->outputs();
    for (auto node : loco::output_nodes(g))
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp

new file mode 100644 (file)

index 0000000..75ec0cf
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(QuantizeWithMinMaxPassTest, name)
+{
+  luci::QuantizeWithMinMaxPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
+                                    luci::QuantizationGranularity::LayerWise);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.cpp

new file mode 100644 (file)

index 0000000..5ea803c
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizedModelVerifier.h"
+
+#include "VerifyQuantizedNodeLayerWiseGranularity.h"
+#include "VerifyQuantizedNodeChannelWiseGranularity.h"
+#include "VerifyQuantizedNodeU8Type.h"
+#include "VerifyQuantizedNodeS16Type.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+void QuantizedModelVerifier::verify(loco::Graph *g)
+{
+  if (_quantized_dtype != Type::U8 && _quantized_dtype != Type::S16)
+    throw std::runtime_error("Unsupported quantized dtype");
+
+  if (_granularity != Granularity::ChannelWise && _granularity != Granularity::LayerWise)
+    throw std::runtime_error("Unsupported granularity");
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+    // Verify Type
+    if (_quantized_dtype == Type::U8)
+    {
+      VerifyQuantizedNodeU8Type vt;
+      if (!circle_node->accept(&vt))
+        throw std::runtime_error("Wrong data type");
+    }
+    else if (_quantized_dtype == Type::S16)
+    {
+      VerifyQuantizedNodeS16Type vt;
+      if (!circle_node->accept(&vt))
+        throw std::runtime_error("Wrong data type");
+    }
+
+    // Verify Granularity
+    if (_granularity == Granularity::LayerWise)
+    {
+      VerifyQuantizedNodeLayerWiseGranularity vg;
+      if (!circle_node->accept(&vg))
+        throw std::runtime_error("Wrong granularity");
+    }
+    else if (_granularity == Granularity::ChannelWise)
+    {
+      VerifyQuantizedNodeChannelWiseGranularity vg;
+      if (!circle_node->accept(&vg))
+        throw std::runtime_error("Wrong granularity");
+    }
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.h b/compiler/luci/pass/src/QuantizedModelVerifier.h

new file mode 100644 (file)

index 0000000..d5fbb8e
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZED_MODEL_VERIFIER_H__
+#define __LUCI_QUANTIZED_MODEL_VERIFIER_H__
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <loco.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to verify quantized model
+ *
+ * TODO Move this to luci/service
+ */
+struct QuantizedModelVerifier
+{
+
+public:
+  QuantizedModelVerifier(loco::DataType quantized_dtype, QuantizationGranularity granularity)
+    : _quantized_dtype(quantized_dtype), _granularity(granularity)
+  {
+  }
+
+  void verify(loco::Graph *g);
+
+private:
+  loco::DataType _quantized_dtype;
+  QuantizationGranularity _granularity;
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZED_MODEL_VERIFIER_H__
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp

new file mode 100644 (file)

index 0000000..eae1b0c
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
@@ -0,0 +1,1668 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizedModelVerifier.h"
+
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+using Type = loco::DataType;
+using Granularity = luci::QuantizationGranularity;
+
+namespace
+{
+
+/**
+ * @brief A helper function to create dummy const node
+ */
+template <Type T> luci::CircleConst *create_dummy_const(loco::Graph *g, luci::test::ShapeU32 shape)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  {
+    node->dtype(T);
+    node->shape(shape);
+    node->size<T>(luci::test::num_elements(shape));
+
+    for (int32_t i = 0; i < luci::test::num_elements(shape); i++)
+    {
+      // DESIGN NOTE
+      //
+      // Filling with any random numbers are fine
+      // Q. Should it include minus numbers?
+      switch (T)
+      {
+        case Type::FLOAT32:
+          // Fill with index
+          node->at<T>(i) = static_cast<float>(i);
+          break;
+        case Type::BOOL:
+          // Fill by flip
+          node->at<T>(i) = (i % 2) ? true : false;
+          break;
+        case Type::U8:
+          // Fill with index
+          node->at<T>(i) = static_cast<uint8_t>(i);
+          break;
+        case Type::S16:
+          // Fill with index
+          node->at<T>(i) = static_cast<int16_t>(i);
+          break;
+      }
+    }
+  }
+
+  return node;
+}
+
+/**
+ * @brief A helper function to create const node with value
+ */
+template <Type DT, typename T>
+luci::CircleConst *create_const(loco::Graph *g, luci::test::ShapeU32 shape,
+                                std::initializer_list<T> values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  {
+    node->dtype(DT);
+    node->shape(shape);
+    node->size<DT>(luci::test::num_elements(shape));
+
+    assert(values.size() == node->size<DT>());
+
+    uint32_t index = 0;
+    for (auto val : values)
+    {
+      node->at<DT>(index++) = static_cast<T>(val);
+    }
+  }
+
+  return node;
+}
+
+void insert_scale_zp(luci::CircleNode *node, float scale, int64_t zp)
+{
+  auto qparam = node->quantparam();
+  assert(qparam != nullptr); // FIX_CALLER_UNLESS
+  qparam->scale.push_back(scale);
+  qparam->zerop.push_back(zp);
+}
+
+void quantize_and_verify(loco::Graph *g, Type quantized_dtype, Granularity granularity)
+{
+  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
+  pass.run(g);
+
+  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+  verifier.verify(g);
+}
+
+// Helper function to reduce duplicate test codes
+// Assumption: g->output()->from() is the target node
+void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
+                                         Granularity granularity, Type wrong_dtype)
+{
+  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
+  pass.run(g->g());
+
+  auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
+  node->dtype(wrong_dtype);
+
+  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+  verifier.verify(g->g());
+}
+
+// Helper function to reduce duplicate test codes
+// Assumption: g->output()->from() is the target node
+void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
+                                                Granularity granularity)
+{
+  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
+  pass.run(g->g());
+
+  auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
+  insert_scale_zp(node, 1.0, 1);
+
+  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+  verifier.verify(g->g());
+}
+
+// Helper function to reduce duplicate test codes
+void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
+                                                Granularity granularity, luci::CircleNode *target)
+{
+  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
+  pass.run(g->g());
+
+  insert_scale_zp(target, 1.0, 1);
+
+  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+  verifier.verify(g->g());
+}
+
+// Set min/max for all non-const nodes in the graph
+void set_minmax_to_non_const(loco::Graph *g, float min, float max)
+{
+  for (auto node : loco::all_nodes(g))
+  {
+    auto const_node = dynamic_cast<luci::CircleConst *>(node);
+    if (const_node != nullptr)
+      continue;
+
+    // Min/Max is not recorded for ArgMax
+    // See MinMaxObserver.cpp in record_minmax module
+    auto argmax_node = dynamic_cast<luci::CircleArgMax *>(node);
+    if (argmax_node != nullptr)
+      continue;
+
+    // Min/Max is not recorded for Split
+    // See MinMaxObserver.cpp in record_minmax module
+    auto split_node = dynamic_cast<luci::CircleSplit *>(node);
+    if (split_node != nullptr)
+      continue;
+
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    {
+      qparam->min.emplace_back(min);
+      qparam->max.emplace_back(max);
+    }
+    circle_node->quantparam(std::move(qparam));
+  }
+}
+
+/**
+ * @brief Simple Test Graph
+ * @note
+ * The simple test graph's nodes are initialized with
+ * simple shapes and values.
+ */
+class SimpleTestGraph : public luci::test::TestIOGraph
+{
+public:
+  virtual void init(void) = 0;
+};
+
+class InstanceNormTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _gamma = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _beta = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _instnorm = g()->nodes()->create<luci::CircleInstanceNorm>();
+    {
+      _instnorm->input(input());
+      _instnorm->gamma(_gamma);
+      _instnorm->beta(_beta);
+    }
+    output()->from(_instnorm);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  loco::Node *gamma(void) const { return _instnorm->gamma(); }
+  loco::Node *beta(void) const { return _instnorm->beta(); }
+
+public:
+  luci::CircleInstanceNorm *_instnorm = nullptr;
+  luci::CircleConst *_input = nullptr;
+  luci::CircleConst *_gamma = nullptr;
+  luci::CircleConst *_beta = nullptr;
+};
+
+class LogisticTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _logistic = g()->nodes()->create<luci::CircleLogistic>();
+    {
+      _logistic->x(input());
+    }
+    output()->from(_logistic);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleLogistic *_logistic = nullptr;
+};
+
+class SoftmaxTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _softmax = g()->nodes()->create<luci::CircleSoftmax>();
+    {
+      _softmax->logits(input());
+      _softmax->beta(0.1);
+    }
+    output()->from(_softmax);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleSoftmax *_softmax = nullptr;
+};
+
+class SpaceToBatchNDTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 2, 2, 1}, {4, 1, 1, 1});
+    _block_shape = create_dummy_const<Type::S32>(g(), {2});
+    for (uint32_t i = 0; i < 2; i++)
+      _block_shape->at<Type::S32>(i) = 2;
+
+    _paddings = create_dummy_const<Type::S32>(g(), {2, 2});
+    for (uint32_t i = 0; i < 4; i++)
+      _paddings->at<Type::S32>(i) = 0;
+
+    _stob = g()->nodes()->create<luci::CircleSpaceToBatchND>();
+    {
+      _stob->input(input());
+      _stob->block_shape(_block_shape);
+      _stob->paddings(_paddings);
+    }
+    output()->from(_stob);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleSpaceToBatchND *_stob = nullptr;
+  luci::CircleConst *_block_shape = nullptr;
+  luci::CircleConst *_paddings = nullptr;
+};
+
+class SpaceToDepthTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 2, 2, 1}, {1, 1, 1, 4});
+    _stod = g()->nodes()->create<luci::CircleSpaceToDepth>();
+    {
+      _stod->input(input());
+      _stod->block_size(2);
+    }
+    output()->from(_stod);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleSpaceToDepth *_stod = nullptr;
+};
+
+template <Type indexT> class SliceTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _begin = g()->nodes()->create<luci::CircleConst>();
+    {
+      _begin->dtype(indexT);
+    }
+    _size = g()->nodes()->create<luci::CircleConst>();
+    {
+      _size->dtype(indexT);
+    }
+    _slice = g()->nodes()->create<luci::CircleSlice>();
+    {
+      _slice->input(input());
+      _slice->begin(_begin);
+      _slice->size(_size);
+    }
+    output()->from(_slice);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleSlice *_slice = nullptr;
+  luci::CircleConst *_begin = nullptr;
+  luci::CircleConst *_size = nullptr;
+};
+
+class SplitTestGraph final : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1, 32}, {32});
+    _split_dim = create_dummy_const<Type::S32>(g(), {1});
+    _split = g()->nodes()->create<luci::CircleSplit>();
+    {
+      _split->input(input());
+      _split->split_dim(_split_dim);
+    }
+    _split_o1 = g()->nodes()->create<luci::CircleSplitOut>();
+    {
+      _split_o1->input(_split);
+      _split_o1->index(0);
+    }
+
+    output()->from(_split_o1);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleSplit *_split = nullptr;
+  luci::CircleSplitOut *_split_o1 = nullptr;
+  luci::CircleConst *_split_dim = nullptr;
+};
+
+class StridedSliceTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _begin = g()->nodes()->create<luci::CircleConst>();
+    {
+      _begin->dtype(Type::S32);
+    }
+    _end = g()->nodes()->create<luci::CircleConst>();
+    {
+      _end->dtype(Type::S32);
+    }
+    _strides = g()->nodes()->create<luci::CircleConst>();
+    {
+      _strides->dtype(Type::S32);
+    }
+    _slice = g()->nodes()->create<luci::CircleStridedSlice>();
+    {
+      _slice->input(input());
+      _slice->begin(_begin);
+      _slice->end(_end);
+      _slice->strides(_strides);
+    }
+    output()->from(_slice);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleStridedSlice *_slice = nullptr;
+  luci::CircleConst *_begin = nullptr;
+  luci::CircleConst *_end = nullptr;
+  luci::CircleConst *_strides = nullptr;
+};
+
+class ReshapeTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _shape = g()->nodes()->create<luci::CircleConst>();
+    {
+      _shape->dtype(Type::S32);
+    }
+    _reshape = g()->nodes()->create<luci::CircleReshape>();
+    {
+      _reshape->tensor(input());
+      _reshape->shape(_shape);
+    }
+    output()->from(_reshape);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleReshape *_reshape = nullptr;
+  luci::CircleConst *_shape = nullptr;
+};
+
+class TanhTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _tanh = g()->nodes()->create<luci::CircleTanh>();
+    {
+      _tanh->x(input());
+    }
+    output()->from(_tanh);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleTanh *_tanh = nullptr;
+};
+
+class FloorTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _floor = g()->nodes()->create<luci::CircleFloor>();
+    {
+      _floor->x(input());
+    }
+    output()->from(_floor);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleFloor *_floor = nullptr;
+};
+
+template <Type indexT> class ArgMaxTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {1});
+    // output dtype is float by default, but ArgMax should have indexType (s32/s64)
+    output()->dtype(indexT);
+    _dimension = g()->nodes()->create<luci::CircleConst>();
+    {
+      _dimension->dtype(indexT);
+    }
+    _argmax = g()->nodes()->create<luci::CircleArgMax>();
+    {
+      _argmax->input(input());
+      _argmax->dimension(_dimension);
+      _argmax->output_type(indexT);
+      _argmax->dtype(indexT);
+    }
+    output()->from(_argmax);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleArgMax *_argmax = nullptr;
+  luci::CircleConst *_dimension = nullptr;
+};
+
+class BatchToSpaceNDTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _block_shape = g()->nodes()->create<luci::CircleConst>();
+    {
+      _block_shape->dtype(Type::S32);
+    }
+    _crops = g()->nodes()->create<luci::CircleConst>();
+    {
+      _crops->dtype(Type::S32);
+    }
+    _btos = g()->nodes()->create<luci::CircleBatchToSpaceND>();
+    {
+      _btos->input(input());
+      _btos->block_shape(_block_shape);
+      _btos->crops(_crops);
+    }
+    output()->from(_btos);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleBatchToSpaceND *_btos = nullptr;
+  luci::CircleConst *_block_shape = nullptr;
+  luci::CircleConst *_crops = nullptr;
+};
+
+class DepthToSpaceTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 1, 1, 4}, {1, 2, 2, 1});
+    _dtos = g()->nodes()->create<luci::CircleDepthToSpace>();
+    {
+      _dtos->input(input());
+      _dtos->block_size(2);
+    }
+    output()->from(_dtos);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleDepthToSpace *_dtos = nullptr;
+};
+
+class PadTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _paddings = g()->nodes()->create<luci::CircleConst>();
+    {
+      _paddings->dtype(Type::S32);
+    }
+    _pad = g()->nodes()->create<luci::CirclePad>();
+    {
+      _pad->input(input());
+      _pad->paddings(_paddings);
+    }
+    output()->from(_pad);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CirclePad *_pad = nullptr;
+  luci::CircleConst *_paddings = nullptr;
+};
+
+class TransposeTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _perm = g()->nodes()->create<luci::CircleConst>();
+    {
+      _perm->dtype(Type::S32);
+    }
+    _transpose = g()->nodes()->create<luci::CircleTranspose>();
+    {
+      _transpose->a(input());
+      _transpose->perm(_perm);
+    }
+    output()->from(_transpose);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleTranspose *_transpose = nullptr;
+  luci::CircleConst *_perm = nullptr;
+};
+
+class ConcatenationTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({16}, {32});
+    _param = create_dummy_const<Type::FLOAT32>(g(), {16});
+    _concat = g()->nodes()->create<luci::CircleConcatenation>(2);
+    {
+      _concat->values(0, input());
+      _concat->values(1, _param);
+      _concat->axis(0);
+    }
+    output()->from(_concat);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleConcatenation *_concat = nullptr;
+  luci::CircleConst *_param = nullptr;
+};
+
+// Test graph for comparison Ops
+// GREATER, GREATER_EQUAL, LESS, LESS_EQUAL, EQUAL, NOT_EQUAL
+template <class Op> class ComparisonOpTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    output()->dtype(loco::DataType::BOOL);
+    _y = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _op = g()->nodes()->create<Op>();
+    {
+      _op->x(input());
+      _op->y(_y);
+      _op->dtype(loco::DataType::BOOL);
+    }
+    output()->from(_op);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x(void) const { return _op->x(); }
+  loco::Node *y(void) const { return _op->y(); }
+
+public:
+  Op *_op = nullptr;
+  luci::CircleConst *_y = nullptr;
+};
+
+// Test graph for binary logical Ops
+// LOGICAL_OR, LOGICAL_AND
+template <class Op> class BinaryLogicalOpTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    input()->dtype(loco::DataType::BOOL);
+    output()->dtype(loco::DataType::BOOL);
+    _y = create_dummy_const<Type::BOOL>(g(), {32});
+    _op = g()->nodes()->create<Op>();
+    {
+      _op->x(input());
+      _op->y(_y);
+      _op->dtype(loco::DataType::BOOL);
+    }
+    output()->from(_op);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x(void) const { return _op->x(); }
+  loco::Node *y(void) const { return _op->y(); }
+
+public:
+  Op *_op = nullptr;
+  luci::CircleConst *_y = nullptr;
+};
+
+class DivTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _div = g()->nodes()->create<luci::CircleDiv>();
+    {
+      _div->x(input());
+      _div->y(_const);
+    }
+    output()->from(_div);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _div->x(); }
+
+  loco::Node *y() { return _div->y(); }
+
+private:
+  luci::CircleDiv *_div = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+class FloorDivTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _floor_div = g()->nodes()->create<luci::CircleFloorDiv>();
+    {
+      _floor_div->x(input());
+      _floor_div->y(_const);
+    }
+    output()->from(_floor_div);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _floor_div->x(); }
+
+  loco::Node *y() { return _floor_div->y(); }
+
+private:
+  luci::CircleFloorDiv *_floor_div = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+class RsqrtTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _rsqrt = g()->nodes()->create<luci::CircleRsqrt>();
+    {
+      _rsqrt->x(input());
+    }
+    output()->from(_rsqrt);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleRsqrt *_rsqrt = nullptr;
+};
+
+class SqrtTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _sqrt = g()->nodes()->create<luci::CircleSqrt>();
+    {
+      _sqrt->x(input());
+    }
+    output()->from(_sqrt);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class EluTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _elu = g()->nodes()->create<luci::CircleElu>();
+    {
+      _elu->features(input());
+    }
+    output()->from(_elu);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  luci::CircleElu *_elu = nullptr;
+};
+
+class PowTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _pow = g()->nodes()->create<luci::CirclePow>();
+    {
+      _pow->x(input());
+      _pow->y(_const);
+    }
+    output()->from(_pow);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _pow->x(); }
+
+  loco::Node *y() { return _pow->y(); }
+
+private:
+  luci::CirclePow *_pow = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+class ResizeBilinearTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 4, 4, 1}, {1, 8, 8, 1});
+
+    _size = create_const<Type::S32, int32_t>(g(), {2}, {8, 8});
+    _resize_bilinear = g()->nodes()->create<luci::CircleResizeBilinear>();
+    {
+      _resize_bilinear->input(input());
+      _resize_bilinear->size(_size);
+    }
+    output()->from(_resize_bilinear);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleResizeBilinear *_resize_bilinear = nullptr;
+  luci::CircleConst *_size = nullptr;
+};
+
+} // namespace
+
+// Quantize and verify with given configurations
+#define TEST_WITH_GRAPH(graph, type, granularity)                   \
+  do                                                                \
+  {                                                                 \
+    graph g;                                                        \
+    g.init();                                                       \
+    EXPECT_NO_THROW(quantize_and_verify(g.g(), type, granularity)); \
+  } while (0)
+
+// Quantize and verify with wrong type
+#define TEST_WITH_WRONG_TYPE(graph, type, granularity, wrong_dtype)                            \
+  do                                                                                           \
+  {                                                                                            \
+    graph g;                                                                                   \
+    g.init();                                                                                  \
+    EXPECT_ANY_THROW(quantize_and_verify_with_wrong_type(&g, type, granularity, wrong_dtype)); \
+  } while (0)
+
+// Quantize and verify with wrong granularity
+#define TEST_WITH_WRONG_GRANULARITY(graph, type, granularity)                            \
+  do                                                                                     \
+  {                                                                                      \
+    graph g;                                                                             \
+    g.init();                                                                            \
+    EXPECT_ANY_THROW(quantize_and_verify_with_wrong_granularity(&g, type, granularity)); \
+  } while (0)
+
+// Quantize and verify with wrong granularity
+// Users can specify the test target
+#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target)                   \
+  do                                                                                           \
+  {                                                                                            \
+    graph g;                                                                                   \
+    g.init();                                                                                  \
+    auto node = loco::must_cast<luci::CircleNode *>(target);                                   \
+    EXPECT_ANY_THROW(quantize_and_verify_with_wrong_granularity(&g, type, granularity, node)); \
+  } while (0)
+
+// Test a local helper function
+TEST(QuantizedModelVerifierTest, LocalCreateDummyConst)
+{
+  loco::Graph g;
+
+  EXPECT_NO_THROW(create_dummy_const<Type::FLOAT32>(&g, {32, 32}));
+}
+
+TEST(QuantizedModelVerifierTest, LocalCreateConst)
+{
+  loco::Graph g;
+  std::initializer_list<float> values = {0.1, 0, -5, 100};
+  luci::CircleConst *node = create_const<Type::FLOAT32, float>(&g, {2, 2}, values);
+
+  uint32_t index = 0;
+  for (auto val : values)
+  {
+    EXPECT_EQ(node->at<Type::FLOAT32>(index++), val);
+  }
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm)
+{
+  TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic)
+{
+  TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax)
+{
+  TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND)
+{
+  TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth)
+{
+  TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice)
+{
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise, Type::U8);
+
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split)
+{
+  TEST_WITH_GRAPH(SplitTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SplitTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SplitTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice)
+{
+  TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax)
+{
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax_wrong_dimension_type_NEG)
+{
+  ArgMaxTestGraph<Type::S32> g;
+  g.init();
+  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, Type::U8, Granularity::LayerWise);
+  pass.run(g.g());
+
+  g._dimension->dtype(Type::U8);
+
+  luci::QuantizedModelVerifier verifier(Type::U8, Granularity::LayerWise);
+  EXPECT_ANY_THROW(verifier.verify(g.g()));
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax_wrong_input_granularity_NEG)
+{
+  ArgMaxTestGraph<Type::S32> g;
+  g.init();
+
+  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, Type::U8, Granularity::LayerWise);
+  pass.run(g.g());
+
+  insert_scale_zp(loco::must_cast<luci::CircleNode *>(g._argmax->input()), 1.0, 1);
+
+  luci::QuantizedModelVerifier verifier(Type::U8, Granularity::LayerWise);
+  EXPECT_ANY_THROW(verifier.verify(g.g()));
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND)
+{
+  TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace)
+{
+  TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation)
+{
+  TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LogicalOr)
+{
+  TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+                  Granularity::LayerWise);
+  TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+                  Granularity::ChannelWise);
+  TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::S16,
+                  Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LogicalOr_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+                       Granularity::LayerWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+                       Granularity::ChannelWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::S16,
+                       Granularity::ChannelWise, Type::S16);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape)
+{
+  TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh)
+{
+  TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad)
+{
+  TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(PadTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(PadTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PadTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PadTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose)
+{
+  TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor)
+{
+  TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual)
+{
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                  Granularity::LayerWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                  Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+                  Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                       Granularity::LayerWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                       Granularity::ChannelWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+                       Granularity::ChannelWise, Type::S16);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                                     Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                                     Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+                                     Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                                     Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                                     Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+                                     Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater)
+{
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::LayerWise,
+                       Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                       Granularity::ChannelWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+                       Granularity::ChannelWise, Type::S16);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                                     Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                                     Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+                                     Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                                     Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                                     Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+                                     Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual)
+{
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                       Granularity::LayerWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                       Granularity::ChannelWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+                       Granularity::ChannelWise, Type::S16);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                                     Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                                     Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+                                     Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                                     Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                                     Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+                                     Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div)
+{
+  TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(DivTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(DivTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(DivTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(DivTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv)
+{
+  TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(FloorDivTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt)
+{
+  TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt)
+{
+  TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu)
+{
+  TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(EluTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(EluTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(EluTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(EluTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow)
+{
+  TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(PowTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(PowTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PowTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PowTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear)
+{
+  TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+#undef TEST_WITH_GRAPH
+#undef TEST_WITH_WRONG_TYPE
+#undef TEST_WITH_WRONG_GRANULARITY
diff --git a/compiler/luci/pass/src/RemoveRedundantReshape.cpp b/compiler/luci/pass/src/RemoveRedundantReshape.cpp

new file mode 100644 (file)

index 0000000..2f0b22a
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantReshape.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_redundant_reshape(luci::CircleReshape *node)
+{
+  auto pred_node = dynamic_cast<luci::CircleReshape *>(node->tensor());
+  if (pred_node == nullptr)
+    return false;
+
+  node->tensor(pred_node->tensor());
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ *      [CircleNode]
+ *            |
+ *    [CircleReshape_1]
+ *            |
+ *    [CircleReshape_2]
+ *            |
+ *      [CircleNode]
+ *
+ * AFTER
+ *
+ *                [CircleNode]
+ *                /          \
+ *    [CircleReshape_1]  [CircleReshape_2]
+ *                               |
+ *                         [CircleNode]
+ **/
+bool RemoveRedundantReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      if (remove_redundant_reshape(reshape_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantReshape.test.cpp b/compiler/luci/pass/src/RemoveRedundantReshape.test.cpp

new file mode 100644 (file)

index 0000000..617840f
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantReshape.test.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveRedundantReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class RemoveRedundantReshape : public ::testing::Test
+{
+public:
+  RemoveRedundantReshape() {}
+
+  void createReshapeConst(luci::CircleReshape *target, const std::vector<int32_t> shape)
+  {
+    auto shape_const = g.nodes()->create<luci::CircleConst>();
+    shape_const->dtype(loco::DataType::S32);
+    shape_const->size<loco::DataType::S32>(shape.size());
+    shape_const->shape_status(luci::ShapeStatus::VALID);
+    shape_const->rank(1);
+    shape_const->dim(0).set(shape.size());
+    for (int32_t i = 0; i < shape.size(); i++)
+    {
+      shape_const->at<loco::DataType::S32>(i) = shape.at(i);
+    }
+    shape_const->name("shape_const");
+    target->shape(shape_const);
+  }
+
+  void buildGraph(const std::initializer_list<uint32_t> base_shape,
+                  const std::vector<int32_t> first_shape, const std::vector<int32_t> second_shape)
+  {
+    // Input Create.
+    input = g.nodes()->create<luci::CircleInput>();
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    input->shape_status(luci::ShapeStatus::VALID);
+    input->rank(base_shape.size());
+    input->shape(base_shape);
+    input->name("input");
+
+    // Create first reshape.
+    first_reshape = g.nodes()->create<luci::CircleReshape>();
+    first_reshape->tensor(input);
+    first_reshape->name("Reshape");
+    createReshapeConst(first_reshape, first_shape);
+
+    // Create second reshape.
+    second_reshape = g.nodes()->create<luci::CircleReshape>();
+    second_reshape->tensor(first_reshape);
+    second_reshape->name("second_reshape");
+    createReshapeConst(second_reshape, second_shape);
+
+    // Output Connect.
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->from(second_reshape);
+    output->name("output");
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleReshape *first_reshape = nullptr;
+  luci::CircleReshape *second_reshape = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(RemoveRedundantReshapePassTest, name)
+{
+  luci::RemoveRedundantReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(RemoveRedundantReshape, simple_case)
+{
+  buildGraph({4, 6}, {-1, 4, 6}, {1, -1, 2, 3});
+  luci::RemoveRedundantReshapePass pass;
+  while (pass.run(&g))
+    ;
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(&g)))
+  {
+    if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      count++;
+    }
+  }
+  ASSERT_EQ(1, count);
+}
diff --git a/compiler/luci/pass/src/RemoveRedundantTranspose.cpp b/compiler/luci/pass/src/RemoveRedundantTranspose.cpp

deleted file mode 100644 (file)

index 33cb765..0000000
--- a/compiler/luci/pass/src/RemoveRedundantTranspose.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/RemoveRedundantTransposePass.h"
-
-#include <luci/IR/CircleNodes.h>
-
-namespace
-{
-
-/// @brief Return true if first_perm[second_perm[i]] == i
-bool check_perm(const luci::CircleConst *first_perm, const luci::CircleConst *second_perm)
-{
-  assert(first_perm->rank() == 1);
-  assert(second_perm->rank() == 1);
-  assert(second_perm->size<loco::DataType::S32>() == first_perm->size<loco::DataType::S32>());
-  for (int32_t i = 0; i < static_cast<int32_t>(first_perm->size<loco::DataType::S32>()); i++)
-  {
-    if (first_perm->at<loco::DataType::S32>(second_perm->at<loco::DataType::S32>(i)) != i)
-      return false;
-  }
-  return true;
-}
-
-bool remove_consecutive_transpose_function(luci::CircleNode *node)
-{
-  auto target_node = dynamic_cast<luci::CircleTranspose *>(node);
-  if (target_node == nullptr)
-    return false;
-  auto pred_node = dynamic_cast<luci::CircleTranspose *>(target_node->a());
-  if (pred_node == nullptr)
-    return false;
-  if (loco::succs(pred_node).size() != 1)
-    return false;
-
-  auto pred_perm = dynamic_cast<luci::CircleConst *>(target_node->perm());
-  if (pred_perm == nullptr)
-    return false;
-
-  auto main_perm = dynamic_cast<luci::CircleConst *>(pred_node->perm());
-  if (main_perm == nullptr)
-    return false;
-
-  auto main_node = loco::must_cast<luci::CircleNode *>(pred_node->a());
-  if (check_perm(pred_perm, main_perm))
-  {
-    replace(node).with(main_node);
-  }
-  else
-  {
-    auto g = main_perm->graph();
-    auto new_const_node = g->nodes()->create<luci::CircleConst>();
-
-    new_const_node->dtype(loco::DataType::S32);
-    new_const_node->rank(1);
-    new_const_node->dim(0) = main_perm->dim(0);
-    new_const_node->size<loco::DataType::S32>(main_perm->dim(0).value());
-    new_const_node->shape_status(luci::ShapeStatus::VALID);
-    for (uint32_t i = 0; i < main_perm->size<loco::DataType::S32>(); i++)
-    {
-      new_const_node->at<loco::DataType::S32>(i) =
-          pred_perm->at<loco::DataType::S32>(main_perm->at<loco::DataType::S32>(i));
-    }
-    pred_node->perm(new_const_node);
-    replace(node).with(pred_node);
-  }
-  return true;
-}
-
-} // namespace
-
-namespace luci
-{
-/**
- *  BEFORE
- *         |
- *   [CircleNode]     [CircleConst]
- *    (main_node)      (main_perm)
- *         \               /
- *         [CircleTranspose]  [CircleConst]
- *            (pred_node)      (pred_perm)
- *                 \               /
- *                 [CircleTranspose]
- *                   (target_node)
- *                         |
- *
- *  AFTER
- *      <Optional Case>
- *
- *          |                 |                   |
- *    [CircleNode]      [CircleConst]             |
- *     (main_node)     (new_const_node)           |
- *           \               /           or  [CircleNode]
- *           [CircleTranspose]                (main_node)
- *              (pred_node)                       |
- *                   |                            |
- *
- */
-bool RemoveRedundantTransposePass::run(loco::Graph *g)
-{
-  bool changed = false;
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
-  {
-    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    if (remove_consecutive_transpose_function(circle_node))
-    {
-      changed = true;
-      break;
-    }
-  }
-  return changed;
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp b/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp

deleted file mode 100644 (file)

index db608b6..0000000
--- a/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "luci/Pass/RemoveRedundantTransposePass.h"
-
-#include <luci/IR/CircleNodes.h>
-
-#include <vector>
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-void setValue(luci::CircleConst *node, const std::vector<int> &v)
-{
-  node->dtype(loco::DataType::S32);
-  node->size<loco::DataType::S32>(v.size());
-  node->rank(1);
-  node->dim(0).set(v.size());
-  for (int i = 0; i < v.size(); ++i)
-  {
-    node->at<loco::DataType::S32>(i) = v[i];
-  }
-}
-
-/**
- *  Type1
- *  BEFORE
- *         |
- *   [CircleNode]     [CircleConst]
- *           \              /
- *           [CircleTranspose]  [CircleConst]
- *                   \              /
- *                   [CircleTranspose]
- *                           |
- *
- *  AFTER
- *         |
- *   [CircleNode]
- *         |   Remove Both
- *
- * --------------------------------------------
- *
- *  Type2
- *  BEFORE
- *         |
- *   [CircleNode]     [CircleConst]
- *           \              /
- *           [CircleTranspose]  [CircleConst]
- *                   \               /
- *                   [CircleTranspose]
- *                           |
- *
- *  AFTER
- *          |                 |
- *    [CircleNode]      [CircleConst]
- *           \               /
- *           [CircleTranspose]
- *                   |
- *
- */
-void create_redundunt_transpose(loco::Graph *g, const std::vector<int32_t> &perm1,
-                                const std::vector<int32_t> &perm2)
-{
-  assert(g);
-
-  auto input = g->nodes()->create<luci::CircleInput>();
-  auto graph_input = g->inputs()->create();
-  input->index(graph_input->index());
-
-  // Create perm1
-  auto perm1_node = g->nodes()->create<luci::CircleConst>();
-  setValue(perm1_node, perm1);
-
-  auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
-  transpose1->dtype(loco::DataType::FLOAT32);
-  transpose1->a(input);
-  transpose1->perm(perm1_node);
-
-  // Create perm2
-  auto perm2_node = g->nodes()->create<luci::CircleConst>();
-  setValue(perm2_node, perm2);
-
-  auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
-  transpose2->dtype(loco::DataType::FLOAT32);
-  transpose2->a(transpose1);
-  transpose2->perm(perm2_node);
-
-  // Output
-  auto output = g->nodes()->create<luci::CircleOutput>();
-  output->from(transpose2);
-  auto graph_output = g->outputs()->create();
-  output->index(graph_output->index());
-}
-
-} // namespace
-
-TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type1)
-{
-  auto graph = loco::make_graph();
-  create_redundunt_transpose(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3});
-
-  luci::RemoveRedundantTransposePass pass;
-  while (pass.run(graph.get()))
-    ;
-  luci::CircleTranspose *transpose_node = nullptr;
-  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
-  {
-    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
-    if (not trans)
-      continue;
-    transpose_node = trans;
-    break;
-  }
-  // No transpose node is in graph.
-  ASSERT_EQ(nullptr, transpose_node);
-}
-
-TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2)
-{
-  auto graph = loco::make_graph();
-  create_redundunt_transpose(graph.get(), {0, 1, 3, 2}, {1, 0, 2, 3});
-
-  luci::RemoveRedundantTransposePass pass;
-  while (pass.run(graph.get()))
-    ;
-  luci::CircleTranspose *transpose_node = nullptr;
-  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
-  {
-    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
-    if (not trans)
-      continue;
-    transpose_node = trans;
-    break;
-  }
-  // Just one transpose node, with updated perm constant.
-  ASSERT_NE(nullptr, transpose_node);
-  auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
-  ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
-  ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
-  ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
-  ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
-}
diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp

new file mode 100644 (file)

index 0000000..71c51ec
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/// @brief Return true if first_perm[second_perm[i]] == i
+bool check_perm(const luci::CircleConst *first_perm, const luci::CircleConst *second_perm)
+{
+  assert(first_perm->rank() == 1);
+  assert(second_perm->rank() == 1);
+  assert(second_perm->size<loco::DataType::S32>() == first_perm->size<loco::DataType::S32>());
+  for (int32_t i = 0; i < static_cast<int32_t>(first_perm->size<loco::DataType::S32>()); i++)
+  {
+    if (first_perm->at<loco::DataType::S32>(second_perm->at<loco::DataType::S32>(i)) != i)
+      return false;
+  }
+  return true;
+}
+
+bool remove_consecutive_transpose_function(luci::CircleTranspose *target_node)
+{
+  auto pred_node = dynamic_cast<luci::CircleTranspose *>(target_node->a());
+  if (pred_node == nullptr)
+    return false;
+
+  auto target_perm = dynamic_cast<luci::CircleConst *>(target_node->perm());
+  if (target_perm == nullptr)
+    return false;
+
+  auto pred_perm = dynamic_cast<luci::CircleConst *>(pred_node->perm());
+  if (pred_perm == nullptr)
+    return false;
+
+  auto main_node = loco::must_cast<luci::CircleNode *>(pred_node->a());
+  if (check_perm(target_perm, pred_perm))
+  {
+    replace(target_node).with(main_node);
+  }
+  else
+  {
+    auto name = target_node->name();
+    assert(name.length() > 0);
+
+    auto g = pred_perm->graph();
+    auto new_const_node = g->nodes()->create<luci::CircleConst>();
+
+    new_const_node->dtype(loco::DataType::S32);
+    new_const_node->rank(1);
+    new_const_node->dim(0) = pred_perm->dim(0);
+    new_const_node->size<loco::DataType::S32>(pred_perm->dim(0).value());
+    new_const_node->shape_status(luci::ShapeStatus::VALID);
+    for (uint32_t i = 0; i < pred_perm->size<loco::DataType::S32>(); i++)
+    {
+      new_const_node->at<loco::DataType::S32>(i) =
+        target_perm->at<loco::DataType::S32>(pred_perm->at<loco::DataType::S32>(i));
+    }
+    new_const_node->name(name + "/Transpose/perm");
+
+    // Create New Transpose Node
+    auto new_transpose_node = g->nodes()->create<luci::CircleTranspose>();
+    new_transpose_node->dtype(target_node->dtype());
+    new_transpose_node->a(main_node);
+    new_transpose_node->perm(new_const_node);
+    new_transpose_node->name(name + "/Transpose");
+    luci::add_origin(new_transpose_node, luci::get_origin(target_node));
+
+    replace(target_node).with(new_transpose_node);
+  }
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ *  BEFORE
+ *         |
+ *   [CircleNode]     [CircleConst]
+ *         |           (pred_perm)
+ *          \              /
+ *         [CircleTranspose]  [CircleConst]
+ *            (pred_node)     (target_perm)
+ *                 \               /
+ *                 [CircleTranspose]
+ *                   (target_node)
+ *                         |
+ *
+ *  AFTER
+ *          |                                     |
+ *    [CircleNode]  [CircleConst](new)            |
+ *           \           /               or  [CircleNode]
+ *           [CircleTranspose](new)               |
+ *                   |                            |
+ */
+bool RemoveRedundantTransposePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto transpose = dynamic_cast<luci::CircleTranspose *>(node))
+    {
+      if (remove_consecutive_transpose_function(transpose))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp

new file mode 100644 (file)

index 0000000..e806234
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void setValue(luci::CircleConst *node, const std::vector<int> &v)
+{
+  node->dtype(loco::DataType::S32);
+  node->size<loco::DataType::S32>(v.size());
+  node->rank(1);
+  node->dim(0).set(v.size());
+  for (int i = 0; i < v.size(); ++i)
+  {
+    node->at<loco::DataType::S32>(i) = v[i];
+  }
+}
+
+/**
+ *  Remove for consecutive Transpose
+ *
+ *  Type1: Remove both Transpose
+ *     BEFORE
+ *            |
+ *      [CircleNode]     [CircleConst]
+ *              \              /
+ *              [CircleTranspose]  [CircleConst]
+ *                      \              /
+ *                      [CircleTranspose]
+ *                              |
+ *
+ *     AFTER
+ *            |
+ *      [CircleNode]
+ *            |
+ *
+ * --------------------------------------------
+ *
+ *  Type2: Merge to one Transpose
+ *     BEFORE
+ *            |
+ *      [CircleNode]     [CircleConst]
+ *              \              /
+ *              [CircleTranspose]  [CircleConst]
+ *                      \               /
+ *                      [CircleTranspose]
+ *                              |
+ *
+ *     AFTER
+ *             |
+ *       [CircleNode]      [CircleConst]
+ *              \               /
+ *              [CircleTranspose]
+ *                      |
+ *
+ */
+void create_redundunt_transpose(loco::Graph *g, const std::vector<int32_t> &perm1,
+                                const std::vector<int32_t> &perm2)
+{
+  assert(g);
+
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto graph_input = g->inputs()->create();
+  input->index(graph_input->index());
+  input->name("input");
+
+  // Create perm1
+  auto perm1_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm1_node, perm1);
+  perm1_node->name("perm1_node");
+
+  auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+  transpose1->dtype(loco::DataType::FLOAT32);
+  transpose1->a(input);
+  transpose1->perm(perm1_node);
+  transpose1->name("transpose1");
+
+  // Create perm2
+  auto perm2_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm2_node, perm2);
+  perm2_node->name("perm2_node");
+
+  auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+  transpose2->dtype(loco::DataType::FLOAT32);
+  transpose2->a(transpose1);
+  transpose2->perm(perm2_node);
+  transpose2->name("transpose2");
+
+  // Output
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  output->from(transpose2);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  output->name("output");
+}
+
+/**
+ * Remove for consecutive Transposes with branching
+ *
+ *  BEFORE
+ *               |
+ *          [CircleNode]       [CircleConst]
+ *                    \           /
+ *     [CircleConst] [CircleTranspose] [CircleConst]
+ *             \          / \              /
+ *        [CircleTranspose] [CircleTranspose]
+ *               |                |
+ *          [CircleNode]     [CircleNode]
+ *               |                |
+ *
+ *  AFTER
+ *   Type 1: Remove all Transpose
+ *                 |
+ *            [CircleNode]
+ *               /    \
+ *      [CircleNode] [CircleNode]
+ *           |            |
+ *
+ *   Type 2: Remove both for one side and create new for another side
+ *                |
+ *          [CircleNode]      [CircleConst](new)
+ *              /  \               /
+ *             /    [CircleTranspose](new)
+ *            |            |
+ *     [CircleNode]   [CircleNode]
+ *            |            |
+ */
+void create_redundunt_transpose_with_branch(loco::Graph *g, const std::vector<int32_t> &perm1,
+                                            const std::vector<int32_t> &perm2,
+                                            const std::vector<int32_t> &perm3)
+{
+  assert(g);
+
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto graph_input = g->inputs()->create();
+  input->dtype(loco::DataType::FLOAT32);
+  input->index(graph_input->index());
+  input->name("input");
+  graph_input->dtype(loco::DataType::FLOAT32);
+
+  graph_input->shape({4, 4, 4, 4});
+  input->shape({4, 4, 4, 4});
+
+  // Create perm1
+  auto perm1_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm1_node, perm1);
+  perm1_node->name("perm1_node");
+
+  auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+  transpose1->dtype(loco::DataType::FLOAT32);
+  transpose1->a(input);
+  transpose1->perm(perm1_node);
+  transpose1->name("transpose1");
+
+  // Create perm2
+  auto perm2_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm2_node, perm2);
+  perm2_node->name("perm2_node");
+
+  auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+  transpose2->dtype(loco::DataType::FLOAT32);
+  transpose2->a(transpose1);
+  transpose2->perm(perm2_node);
+  transpose2->name("transpose2");
+
+  // create perm3
+  auto perm3_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm3_node, perm3);
+  perm3_node->name("perm3_node");
+
+  auto transpose3 = g->nodes()->create<luci::CircleTranspose>();
+  transpose3->dtype(loco::DataType::FLOAT32);
+  transpose3->a(transpose1);
+  transpose3->perm(perm3_node);
+  transpose3->name("transpose3");
+
+  // Output
+  auto output1 = g->nodes()->create<luci::CircleOutput>();
+  output1->from(transpose2);
+  output1->name("output1");
+  auto output2 = g->nodes()->create<luci::CircleOutput>();
+  output2->from(transpose3);
+  output2->name("output2");
+  auto graph_output1 = g->outputs()->create();
+  output1->index(graph_output1->index());
+  auto graph_output2 = g->outputs()->create();
+  output2->index(graph_output2->index());
+  output1->dtype(loco::DataType::FLOAT32);
+  output2->dtype(loco::DataType::FLOAT32);
+  graph_output1->dtype(loco::DataType::FLOAT32);
+  graph_output2->dtype(loco::DataType::FLOAT32);
+  output1->shape({4, 4, 4, 4});
+  output2->shape({4, 4, 4, 4});
+  graph_output1->shape({4, 4, 4, 4});
+  graph_output2->shape({4, 4, 4, 4});
+}
+
+} // namespace
+
+TEST(RemoveRedundantTransposePassTest, name)
+{
+  luci::RemoveRedundantTransposePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type1)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  // No transpose node is in graph.
+  ASSERT_EQ(nullptr, transpose_node);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose(graph.get(), {0, 1, 3, 2}, {1, 0, 2, 3});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  // Just one transpose node, with updated perm constant.
+  ASSERT_NE(nullptr, transpose_node);
+  auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+  ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
+  ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
+  ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
+  ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
+}
+
+/**
+ * @brief Test case that first transpose output become input of operations more than one.
+ */
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_with_branch_remove_case)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose_with_branch(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3}, {1, 0, 2, 3});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  // No transpose node is in graph.
+  ASSERT_EQ(nullptr, transpose_node);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_with_branch_leave_one)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose_with_branch(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  ASSERT_NE(nullptr, transpose_node);
+  auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+  ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
+  ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
+  ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
+  ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp

new file mode 100644 (file)

index 0000000..3f0c4ee
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_no_effect_reshape(luci::CircleNode *node)
+{
+  auto target_node = dynamic_cast<luci::CircleReshape *>(node);
+  if (target_node == nullptr)
+    return false;
+
+  auto new_shape = dynamic_cast<luci::CircleConst *>(target_node->shape());
+  if (new_shape == nullptr)
+    return false;
+
+  // Compare updated shape and input shape.
+  auto input_node = loco::must_cast<luci::CircleNode *>(target_node->tensor());
+  if (input_node->rank() != new_shape->dim(0).value())
+    return false;
+  for (uint32_t i = 0; i < input_node->rank(); i++)
+  {
+    // If update_shape is -1, don't care
+    // TODO check updated shape has value -1 at most one.
+    if (new_shape->at<loco::DataType::S32>(i) == -1)
+      continue;
+    // If input_shape dynamic, can't remove this.
+    if (!input_node->dim(i).known())
+      return false;
+    // If input_shape and updated shape differ, also can't remove.
+    if (input_node->dim(i).value() != static_cast<uint32_t>(new_shape->at<loco::DataType::S32>(i)))
+      return false;
+  }
+
+  replace(target_node).with(input_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveUnnecessaryReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (remove_no_effect_reshape(circle_node))
+    {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.test.cpp

new file mode 100644 (file)

index 0000000..9d2e758
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.test.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class ReshapeGraphlet
+{
+public:
+  ReshapeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+  {
+    std::vector<uint32_t> shape_vector{input_shape};
+
+    auto dim0_val = remove ? shape_vector.size() : 1;
+    _reshape_shape = g->nodes()->create<luci::CircleConst>();
+    _reshape_shape->rank(1);
+    _reshape_shape->dim(0).set(dim0_val);
+    _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+    _reshape_shape->dtype(loco::DataType::S32);
+
+    _reshape_shape->size<loco::DataType::S32>(dim0_val);
+    for (uint32_t i = 0; i < dim0_val; i++)
+    {
+      if (remove)
+        _reshape_shape->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape_vector.at(i));
+      else
+        _reshape_shape->at<loco::DataType::S32>(i) = -1;
+    }
+    _reshape_shape->name("reshape_shape");
+
+    // Reshape create
+    auto newshape_rank = remove ? shape_vector.size() : 1;
+    _reshape = g->nodes()->create<luci::CircleReshape>();
+    _reshape->newShape()->rank(newshape_rank);
+    for (uint32_t i = 0; i < newshape_rank; i++)
+    {
+      if (remove)
+        _reshape->newShape()->dim(i) = static_cast<int32_t>(shape_vector.at(i));
+      else
+        _reshape->newShape()->dim(i) = -1;
+    }
+    _reshape->name("reshape");
+  }
+
+protected:
+  luci::CircleReshape *_reshape = nullptr;
+  luci::CircleConst *_reshape_shape = nullptr;
+};
+
+class ReshapeGraph : public TestIOGraph, public ReshapeGraphlet
+{
+public:
+  ReshapeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape, bool remove)
+  {
+    TestIOGraph::init(shape, shape);
+    ReshapeGraphlet::init(g(), shape, remove);
+
+    // connect graph
+    _reshape->tensor(input());
+    _reshape->shape(_reshape_shape);
+
+    output()->from(_reshape);
+  }
+};
+
+// TODO use ::testing::Test
+
+} // namespace
+
+TEST(RemoveUnnecessaryReshapePassTest, name)
+{
+  luci::RemoveUnnecessaryReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessaryReshapePass, removed)
+{
+  ReshapeGraph g;
+
+  g.init({1, 2, 3, 4}, true);
+
+  // confirm graph has Reshape
+  auto reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+  ASSERT_NE(nullptr, reshape_node);
+  luci::RemoveUnnecessaryReshapePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  // check Reshape is removed
+  reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+  ASSERT_EQ(nullptr, reshape_node);
+}
+
+TEST(RemoveUnnecessaryReshapePass, not_removed_NEG)
+{
+  ReshapeGraph g;
+
+  g.init({1, 2, 3, 4}, false);
+
+  // confirm graph has Reshape
+  auto reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+  ASSERT_NE(nullptr, reshape_node);
+  luci::RemoveUnnecessaryReshapePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  // check Reshape is NOT removed
+  reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+  ASSERT_NE(nullptr, reshape_node);
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessarySlicePass.cpp b/compiler/luci/pass/src/RemoveUnnecessarySlicePass.cpp

new file mode 100644 (file)

index 0000000..0720813
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessarySlicePass.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * @brief   Return value in CircleConst.
+ * @details Return value in position on CircleConst with int64 format.
+ *          Begin must be larger than or equal to 0. Size must be larger
+ *          than or equal to -1.
+ */
+int64_t value_from_circle_const(const luci::CircleConst *node, uint32_t idx)
+{
+  assert(node->rank() == 1 && node->dim(0).value() > idx);
+  assert(node->dtype() == loco::DataType::S64 || node->dtype() == loco::DataType::S32);
+
+  if (node->dtype() == loco::DataType::S64)
+    return node->at<loco::DataType::S64>(idx);
+  return static_cast<int64_t>(node->at<loco::DataType::S32>(idx));
+}
+
+bool remove_no_effect_slice(luci::CircleNode *node)
+{
+  auto target_node = dynamic_cast<luci::CircleSlice *>(node);
+  if (target_node == nullptr)
+    return false;
+
+  auto begin_const = dynamic_cast<luci::CircleConst *>(target_node->begin());
+  if (begin_const == nullptr)
+    return false;
+
+  auto size_const = dynamic_cast<luci::CircleConst *>(target_node->size());
+  if (size_const == nullptr)
+    return false;
+
+  // Check input output shape.
+  auto input_node = loco::must_cast<luci::CircleNode *>(target_node->input());
+  for (uint32_t i = 0; i < input_node->rank(); i++)
+  {
+    if (value_from_circle_const(begin_const, i) != 0)
+      return false;
+
+    int64_t size_value = value_from_circle_const(size_const, i);
+    if (size_value == -1)
+      continue;
+    if (size_value != static_cast<int64_t>(input_node->dim(i).value()))
+      return false;
+
+    if (!input_node->dim(i).known())
+      return false;
+  }
+  replace(target_node).with(input_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleSlice]
+ *          |
+ *    [CircleNode]
+ *
+ * AFTER
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleNode]
+ *
+ * Slice OP has no effect if,
+ *    1. Static Shape : begin_const[idx] is 0 AND size_const[idx] is (-1 OR input_dimension[idx])
+ *    2. Dynamic Shape : begin_const[idx] is 0 AND size_const[idx] is -1
+ */
+bool RemoveUnnecessarySlicePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (remove_no_effect_slice(circle_node))
+    {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessarySlicePass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessarySlicePass.test.cpp

new file mode 100644 (file)

index 0000000..80921a9
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessarySlicePass.test.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SliceGraphlet
+{
+public:
+  SliceGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+  {
+    // Begin Create.
+    _begin = g->nodes()->create<luci::CircleConst>();
+    _begin->rank(1);
+    _begin->dim(0).set(input_shape.size());
+    _begin->shape_status(luci::ShapeStatus::VALID);
+    _begin->dtype(loco::DataType::S32);
+    _begin->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+      _begin->at<loco::DataType::S32>(i) = remove ? 0 : 1;
+    _begin->name("begin");
+
+    // Size Create.
+    _size = g->nodes()->create<luci::CircleConst>();
+    _size->rank(1);
+    _size->dim(0).set(input_shape.size());
+    _size->shape_status(luci::ShapeStatus::VALID);
+    _size->dtype(loco::DataType::S32);
+    _size->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+      _size->at<loco::DataType::S32>(i) = -1;
+    _size->name("size");
+
+    // Slice Node create.
+    _slice = g->nodes()->create<luci::CircleSlice>();
+    _slice->dtype(loco::DataType::S32);
+    _slice->name("slice");
+  }
+
+protected:
+  luci::CircleSlice *_slice = nullptr;
+  luci::CircleConst *_begin = nullptr;
+  luci::CircleConst *_size = nullptr;
+};
+
+class SliceGraph : public TestIOGraph, public SliceGraphlet
+{
+public:
+  SliceGraph() = default;
+
+public:
+  void init(const ShapeU32 shape, bool remove)
+  {
+    TestIOGraph::init(shape, shape);
+    SliceGraphlet::init(g(), shape, remove);
+
+    _slice->input(input());
+    _slice->begin(_begin);
+    _slice->size(_size);
+
+    output()->from(_slice);
+  }
+};
+
+} // namespace
+
+TEST(RemoveUnnecessarySlicePass, name)
+{
+  luci::RemoveUnnecessarySlicePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessarySlicePass, removed)
+{
+  SliceGraph g;
+
+  g.init({2, 4, 2, 3}, true);
+
+  // confirm graph has Slice
+  auto slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+  ASSERT_NE(nullptr, slice_node);
+  luci::RemoveUnnecessarySlicePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  // check Slice is removed
+  slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+  ASSERT_EQ(nullptr, slice_node);
+}
+
+TEST(RemoveUnnecessarySlicePass, not_removed_NEG)
+{
+  SliceGraph g;
+
+  g.init({2, 4, 2, 3}, false);
+
+  // confirm graph has Slice
+  auto slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+  ASSERT_NE(nullptr, slice_node);
+  luci::RemoveUnnecessarySlicePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  // check Slice is NOT removed
+  slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+  ASSERT_NE(nullptr, slice_node);
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessarySplitPass.cpp b/compiler/luci/pass/src/RemoveUnnecessarySplitPass.cpp

new file mode 100644 (file)

index 0000000..3243f62
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessarySplitPass.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+bool remove_unnecessary_split(luci::CircleNode *node)
+{
+  auto target_node = dynamic_cast<luci::CircleSplitOut *>(node);
+  if (target_node == nullptr)
+    return false;
+
+  auto split_node = dynamic_cast<luci::CircleSplit *>(target_node->input());
+  if (split_node == nullptr)
+    return false;
+
+  if (loco::succs(split_node).size() != 1)
+    return false;
+
+  if (split_node->num_split() == 1)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(split_node->input());
+    replace(target_node).with(input_node);
+    return true;
+  }
+  return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveUnnecessarySplitPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (remove_unnecessary_split(circle_node))
+    {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessarySplitPass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessarySplitPass.test.cpp

new file mode 100644 (file)

index 0000000..f292b53
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessarySplitPass.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SplitGraphlet
+{
+public:
+  SplitGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, uint32_t nout)
+  {
+    assert(nout == 1 || nout == 2);
+
+    _dim = g->nodes()->create<luci::CircleConst>();
+    set_shape_vector(_dim, {0});
+    _dim->name("dim");
+
+    _split = g->nodes()->create<luci::CircleSplit>();
+    _split->num_split(nout);
+    _split->name("split");
+
+    _split_out_0 = g->nodes()->create<luci::CircleSplitOut>();
+    _split_out_0->index(0);
+    _split_out_0->name("split_out_0");
+
+    if (nout == 2)
+    {
+      _split_out_1 = g->nodes()->create<luci::CircleSplitOut>();
+      _split_out_1->index(1);
+      _split_out_1->name("split_out_1");
+    }
+  }
+
+protected:
+  luci::CircleSplit *_split = nullptr;
+  luci::CircleConst *_dim = nullptr;
+  luci::CircleSplitOut *_split_out_0 = nullptr;
+  luci::CircleSplitOut *_split_out_1 = nullptr;
+};
+
+class SplitOneGraph : public TestIGraphlet, public TestOGraphlet, public SplitGraphlet
+{
+public:
+  SplitOneGraph() = default;
+
+public:
+  void init()
+  {
+    TestIGraphlet::init(g(), {1});
+    TestOGraphlet::init(g(), {1});
+    SplitGraphlet::init(g(), 1);
+
+    _split->input(input());
+    _split->split_dim(_dim);
+    _split_out_0->input(_split);
+
+    output()->from(_split_out_0);
+  }
+};
+
+class SplitTwoGraph : public TestIGraphlet, public TestOsGraphlet<2>, public SplitGraphlet
+{
+public:
+  SplitTwoGraph() = default;
+
+public:
+  void init()
+  {
+    TestIGraphlet::init(g(), {1});
+    TestOsGraphlet<2>::init(g(), {{1}, {1}});
+    SplitGraphlet::init(g(), 2);
+
+    _split->input(input());
+    _split->split_dim(_dim);
+    _split_out_0->input(_split);
+    _split_out_1->input(_split);
+
+    output(0)->from(_split_out_0);
+    output(1)->from(_split_out_1);
+  }
+};
+
+// TODO use ::testing::Test
+
+} // namespace
+
+TEST(RemoveUnnecessarySplitPass, name)
+{
+  luci::RemoveUnnecessarySplitPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessarySplitPass, create_unnecessary_split)
+{
+  SplitOneGraph g;
+
+  g.init();
+
+  luci::RemoveUnnecessarySplitPass pass;
+  while (pass.run(g.g()))
+    ;
+
+  auto split_node = luci::test::first_node<luci::CircleSplit>(g.g());
+  // No Split node is in graph.
+  ASSERT_EQ(nullptr, split_node);
+}
+
+TEST(RemoveUnnecessarySplitPass, create_unnecessary_split_NEG)
+{
+  SplitTwoGraph g;
+
+  g.init();
+
+  luci::RemoveUnnecessarySplitPass pass;
+  while (pass.run(g.g()))
+    ;
+
+  auto split_node = luci::test::first_node<luci::CircleSplit>(g.g());
+  // Split node is in graph.
+  ASSERT_NE(nullptr, split_node);
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.cpp

new file mode 100644 (file)

index 0000000..22b1aa6
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * @brief   Return value in CircleConst.
+ * @details Return value in position on CircleConst with int64 format.
+ */
+int64_t value_from_circle_const(const luci::CircleConst *node, uint32_t idx)
+{
+  assert(node->rank() == 1 && node->dim(0).value() > idx);
+  assert(node->dtype() == loco::DataType::S64 || node->dtype() == loco::DataType::S32);
+
+  if (node->dtype() == loco::DataType::S64)
+    return node->at<loco::DataType::S64>(idx);
+  return static_cast<int64_t>(node->at<loco::DataType::S32>(idx));
+}
+
+bool remove_no_effect_strided_slice(luci::CircleStridedSlice *target_node)
+{
+  auto begin_const = dynamic_cast<luci::CircleConst *>(target_node->begin());
+  if (begin_const == nullptr)
+    return false;
+
+  auto strides_const = dynamic_cast<luci::CircleConst *>(target_node->strides());
+  if (strides_const == nullptr)
+    return false;
+
+  auto end_const = dynamic_cast<luci::CircleConst *>(target_node->end());
+  if (end_const == nullptr)
+    return false;
+
+  auto input_node = loco::must_cast<luci::CircleNode *>(target_node->input());
+  for (uint32_t i = 0; i < input_node->rank(); i++)
+  {
+    if (value_from_circle_const(begin_const, i) != 0)
+      return false;
+
+    int64_t strides_value = value_from_circle_const(strides_const, i);
+    if (strides_value != 1)
+      return false;
+
+    int64_t end_value = value_from_circle_const(end_const, i);
+    if (end_value == -1)
+      continue;
+
+    if (end_value != input_node->dim(i).value())
+      return false;
+
+    if (!input_node->dim(i).known())
+      return false;
+  }
+
+  /**
+   * We check additional attributes on zero after shapes
+   * for skipping wrong StridedSlice operator.
+   */
+  if (target_node->new_axis_mask() != 0 || target_node->shrink_axis_mask() != 0)
+    return false;
+
+  replace(target_node).with(input_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleStridedSlice]
+ *          |
+ *    [CircleNode]
+ *
+ * AFTER
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleNode]   [CircleStridedSlice]
+ *
+ * StridedSlice OP has no effect if,
+ *    1. Static Shape : begin_const[idx] is 0 AND strides_const[idx] is (not 1 OR
+ *       input_dimension[idx])
+ *    2. Dynamic Shape : begin_const[idx] is 0 AND strides_const[idx] is not 1
+ *
+ * StridedSlice OP has effect if,
+ *    1. begin_const[idx] is 0 AND input_shape[idx] are equal to end_shape[idx]
+ */
+bool RemoveUnnecessaryStridedSlicePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto target_node = dynamic_cast<luci::CircleStridedSlice *>(node);
+    if (target_node != nullptr)
+      if (remove_no_effect_strided_slice(target_node))
+        changed = true;
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.test.cpp

new file mode 100644 (file)

index 0000000..7d611c8
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.test.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class StridedSliceGraphlet
+{
+public:
+  StridedSliceGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+  {
+    // Begin create
+    _begin = g->nodes()->create<luci::CircleConst>();
+    _begin->rank(1);
+    _begin->dim(0).set(input_shape.size());
+    _begin->shape_status(luci::ShapeStatus::VALID);
+    _begin->dtype(loco::DataType::S32);
+    _begin->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+    {
+      _begin->at<loco::DataType::S32>(i) = remove ? 0 : 1;
+    }
+
+    // Strides create
+    _strides = g->nodes()->create<luci::CircleConst>();
+    _strides->rank(1);
+    _strides->dim(0).set(input_shape.size());
+    _strides->shape_status(luci::ShapeStatus::VALID);
+    _strides->dtype(loco::DataType::S32);
+    _strides->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+    {
+      _strides->at<loco::DataType::S32>(i) = remove ? 1 : -1;
+    }
+
+    std::vector<uint32_t> shape_vector{input_shape};
+
+    _end = g->nodes()->create<luci::CircleConst>();
+    _end->rank(1);
+    _end->dim(0).set(input_shape.size());
+    _end->shape_status(luci::ShapeStatus::VALID);
+    _end->dtype(loco::DataType::S32);
+    _end->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+    {
+      if (remove)
+        _end->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape_vector.at(i));
+      else
+        _end->at<loco::DataType::S32>(i) = -1;
+    }
+
+    // StridedSlice Node create
+    _strided_slice = g->nodes()->create<luci::CircleStridedSlice>();
+    _strided_slice->dtype(loco::DataType::S32);
+  }
+
+protected:
+  luci::CircleStridedSlice *_strided_slice = nullptr;
+  luci::CircleConst *_begin = nullptr;
+  luci::CircleConst *_strides = nullptr;
+  luci::CircleConst *_end = nullptr;
+};
+
+class StridedSliceGraph : public TestIOGraph, public StridedSliceGraphlet
+{
+public:
+  StridedSliceGraph() = default;
+
+public:
+  void init(const ShapeU32 shape, bool remove)
+  {
+    TestIOGraph::init(shape, shape);
+    StridedSliceGraphlet::init(g(), shape, remove);
+
+    _strided_slice->input(input());
+    _strided_slice->begin(_begin);
+    _strided_slice->strides(_strides);
+    _strided_slice->end(_end);
+
+    output()->from(_strided_slice);
+  }
+};
+
+} // namespace
+
+TEST(RemoveUnnecessaryStridedSlicePass, basic_case)
+{
+  StridedSliceGraph g;
+
+  g.init({2, 4, 2, 3}, true);
+
+  auto strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+  ASSERT_NE(nullptr, strided_slice_node);
+  luci::RemoveUnnecessaryStridedSlicePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+  ASSERT_EQ(nullptr, strided_slice_node);
+}
+
+TEST(RemoveUnnecessaryStridedSlicePass, basic_fail_case_NEG)
+{
+  StridedSliceGraph g;
+
+  g.init({2, 4, 2, 3}, false);
+
+  auto strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+  ASSERT_NE(nullptr, strided_slice_node);
+  luci::RemoveUnnecessaryStridedSlicePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+  ASSERT_NE(nullptr, strided_slice_node);
+}
diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp

index 7096c2591a981870bd0bf441377e9d019bd5dccc..a0cc0194fb665dfa3ca7edcdc6972dfddb874416 100644 (file)
--- a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
+++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
@@ -16,7 +16,10 @@
  
  #include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
  
+#include "BatchNormPatternFinder.h"
+
  #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  namespace
  {
@@ -26,6 +29,9 @@ luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma)
    assert(gamma->rank() == 1);
    auto channel_size = gamma->dim(0).value();
  
+  auto name = gamma->name();
+  assert(name.length() > 0);
+
    // Channel-wise MUL is the same as DEPTHWISE_CONV2D with filter shape (1,1,1,channel_size)
    auto weights = gamma->graph()->nodes()->create<luci::CircleConst>();
    weights->dtype(loco::DataType::FLOAT32);
@@ -40,6 +46,7 @@ luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma)
    {
      weights->at<loco::DataType::FLOAT32>(i) = gamma->at<loco::DataType::FLOAT32>(i);
    }
+  weights->name(name + "_weights");
  
    return weights;
  }
@@ -49,6 +56,9 @@ luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta)
    assert(beta->rank() == 1);
    auto channel_size = beta->dim(0).value();
  
+  auto name = beta->name();
+  assert(name.length() > 0);
+
    // Channel-wise ADD is the same as bias (shape = (channel_size)) of DEPTHWISE_CONV2D
    auto bias = beta->graph()->nodes()->create<luci::CircleConst>();
    bias->dtype(loco::DataType::FLOAT32);
@@ -60,83 +70,11 @@ luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta)
    {
      bias->at<loco::DataType::FLOAT32>(i) = beta->at<loco::DataType::FLOAT32>(i);
    }
+  bias->name(name + "_bias");
  
    return bias;
  }
  
-bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta)
-{
-  auto x = loco::must_cast<luci::CircleNode *>(add->x());
-  auto y = loco::must_cast<luci::CircleNode *>(add->y());
-
-  luci::CircleMul *pred = nullptr;
-  luci::CircleConst *constant = nullptr;
-
-  if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL)
-  {
-    pred = loco::must_cast<luci::CircleMul *>(y);
-    constant = loco::must_cast<luci::CircleConst *>(x);
-  }
-  else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST)
-  {
-    pred = loco::must_cast<luci::CircleMul *>(x);
-    constant = loco::must_cast<luci::CircleConst *>(y);
-  }
-  else
-  {
-    return false;
-  }
-
-  if (constant->rank() != 1)
-    return false;
-
-  auto channel_dim = constant->dim(0);
-  // Assumption: Layout is channel-last
-  if (!(channel_dim == add->dim(add->rank() - 1)))
-    return false;
-
-  mul = pred;
-  beta = constant;
-  return true;
-}
-
-// Check if mul is batchnorm mul
-bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
-                      luci::CircleConst *&gamma)
-{
-  auto x = dynamic_cast<luci::CircleConst *>(mul->x());
-  auto y = dynamic_cast<luci::CircleConst *>(mul->y());
-
-  luci::CircleNode *pred = nullptr;
-  luci::CircleConst *constant = nullptr;
-
-  if (x != nullptr && y == nullptr)
-  {
-    pred = loco::must_cast<luci::CircleNode *>(mul->y());
-    constant = x;
-  }
-  else if (x == nullptr && y != nullptr)
-  {
-    pred = loco::must_cast<luci::CircleNode *>(mul->x());
-    constant = y;
-  }
-  else
-  {
-    return false;
-  }
-
-  if (constant->rank() != 1)
-    return false;
-
-  auto channel_dim = constant->dim(0);
-  if (!(channel_dim == mul->dim(mul->rank() - 1)))
-    return false;
-
-  pred_node = pred;
-  gamma = constant;
-  return true;
-}
-
  /**
   *  Replace channel-wise Mul/Add with DepthwiseConv2D
   *
@@ -180,6 +118,9 @@ bool replace_mul_add_with_dwconv(luci::CircleAdd *add)
    auto weights = create_weights_from_gamma(gamma);
    auto bias = create_bias_from_beta(beta);
  
+  auto name = add->name();
+  assert(name.length() > 0);
+
    auto dwconv = add->graph()->nodes()->create<luci::CircleDepthwiseConv2D>();
    dwconv->input(pred_node);
    dwconv->filter(weights);
@@ -191,6 +132,8 @@ bool replace_mul_add_with_dwconv(luci::CircleAdd *add)
    dwconv->dilation()->w(1);
    dwconv->dilation()->h(1);
    dwconv->fusedActivationFunction(add->fusedActivationFunction());
+  dwconv->name(name + "/DepthwiseConv2D");
+  luci::add_origin(dwconv, luci::composite_origin({luci::get_origin(mul), luci::get_origin(add)}));
  
    loco::replace(add).with(dwconv);
    return true;
@@ -206,14 +149,10 @@ bool ReplaceMulAddWithDepthwiseConvPass::run(loco::Graph *g)
    bool changed = false;
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    auto add = dynamic_cast<luci::CircleAdd *>(node);
-    if (not add)
-      continue;
-
-    if (replace_mul_add_with_dwconv(add))
+    if (auto add = dynamic_cast<luci::CircleAdd *>(node))
      {
-      changed = true;
-      break;
+      if (replace_mul_add_with_dwconv(add))
+        changed = true;
      }
    }
  
diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp

index a90182aaacf5c263a7241cf8db464e58299be4d9..903d4dcc993673c32d85c563f887814e45d59de4 100644 (file)
--- a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
+++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
@@ -85,6 +85,13 @@ public:
      add->x(mul);
      add->y(beta);
      output->from(add);
+
+    input->name("input");
+    mul->name("mul");
+    gamma->name("gamma");
+    add->name("add");
+    beta->name("beta");
+    output->name("output");
    }
  
  public:
@@ -99,6 +106,13 @@ public:
  
  } // namespace
  
+TEST(ReplaceMulAddWithDepthwiseConv, name)
+{
+  luci::ReplaceMulAddWithDepthwiseConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
  TEST(ReplaceMulAddWithDepthwiseConv, simple)
  {
    SimpleGraph g;
diff --git a/compiler/luci/pass/src/RequantizePass.cpp b/compiler/luci/pass/src/RequantizePass.cpp

index fe84e3bc30d846825aa0abf466443e254751e3a6..a56536251d8f17edf48299405e8f35ef97361b76 100644 (file)
--- a/compiler/luci/pass/src/RequantizePass.cpp
+++ b/compiler/luci/pass/src/RequantizePass.cpp
@@ -113,7 +113,7 @@ void requant_const_int8_to_uint8(CircleConst *node)
  struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool>
  {
    RequantizeNonConst(loco::DataType input, loco::DataType output)
-      : _input_type(input), _output_type(output)
+    : _input_type(input), _output_type(output)
    {
    }
  
@@ -157,7 +157,7 @@ struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool>
  struct RequantizeConst final : public luci::CircleNodeMutableVisitor<bool>
  {
    RequantizeConst(loco::DataType input, loco::DataType output)
-      : _input_type(input), _output_type(output)
+    : _input_type(input), _output_type(output)
    {
    }
  
diff --git a/compiler/luci/pass/src/RequantizePass.test.cpp b/compiler/luci/pass/src/RequantizePass.test.cpp

new file mode 100644 (file)

index 0000000..d26743c
--- /dev/null
+++ b/compiler/luci/pass/src/RequantizePass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RequantizePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(RequantizePassTest, name)
+{
+  luci::RequantizePass pass(loco::DataType::FLOAT32, loco::DataType::U8);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp

index e52d667d795d644f48f66b463180cc27915f30ef..1737e5dd6bc523fa92cd70d90f398a89a802df9d 100644 (file)
--- a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
@@ -20,6 +20,7 @@
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/AttrFusedActFunc.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  namespace
  {
@@ -67,10 +68,17 @@ bool resolve_with_BroadcastTo(luci::CircleCustom *addv2)
    auto input = loco::must_cast<const luci::CircleCustomOut *>(addv2->inputs(broadcastTo_idx));
    auto broadcastTo = loco::must_cast<luci::CircleCustom *>(input->input());
  
+  auto name = addv2->name();
+  assert(name.length() > 0);
+
    auto add = addv2->graph()->nodes()->create<luci::CircleAdd>();
    add->fusedActivationFunction(luci::FusedActFunc::NONE);
    add->x(addv2->inputs(1 - broadcastTo_idx));
    add->y(broadcastTo->inputs(0));
+  add->name(name + "/Add");
+  luci::add_origin(
+    add, luci::composite_origin({luci::get_origin(broadcastTo), luci::get_origin(addv2)}));
+
    auto customOut = loco::succs(addv2);
    assert(customOut.size() == 1);
    replace(*customOut.begin()).with(add);
@@ -86,13 +94,39 @@ bool resolve_custom_op(luci::CircleCustom *addv2)
    if (custom_code != "AddV2")
      return false;
  
+  if (addv2->numInputs() != 2)
+    return false;
+
+  // check if inputs are suppport data types
+  for (uint32_t i = 0; i < addv2->numInputs(); i++)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(addv2->inputs(i));
+    switch (input->dtype())
+    {
+      case loco::DataType::U8:
+      case loco::DataType::S8:
+      case loco::DataType::S16:
+      case loco::DataType::S32:
+      case loco::DataType::FLOAT32:
+        break;
+      default:
+        return false;
+    }
+  }
+
    if (resolve_with_BroadcastTo(addv2))
      return true;
  
+  auto name = addv2->name();
+  assert(name.length() > 0);
+
    auto add = addv2->graph()->nodes()->create<luci::CircleAdd>();
    add->fusedActivationFunction(luci::FusedActFunc::NONE);
    add->x(addv2->inputs(0));
    add->y(addv2->inputs(1));
+  add->name(name + "/Add");
+  luci::add_origin(add, luci::get_origin(addv2));
+
    auto customOut = loco::succs(addv2);
    assert(customOut.size() == 1);
    replace(*customOut.begin()).with(add);
@@ -115,7 +149,8 @@ bool ResolveCustomOpAddPass::run(loco::Graph *g)
      if (not cop)
        continue;
  
-    changed |= resolve_custom_op(cop);
+    if (resolve_custom_op(cop))
+      changed = true;
    }
  
    return changed;
diff --git a/compiler/luci/pass/src/ResolveCustomOpAddPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpAddPass.test.cpp

new file mode 100644 (file)

index 0000000..31c245b
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpAddPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpAddPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveCustomOpAddPassTest, name)
+{
+  luci::ResolveCustomOpAddPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp

index 145e9cb620ef74192cfb296381e9139f75d8f874..5e9466a63898576cfafb58eed5ff298a0ae5298f 100644 (file)
--- a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
@@ -19,6 +19,7 @@
  #include "flatbuffers/flexbuffers.h"
  
  #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  namespace
  {
@@ -30,6 +31,9 @@ bool resolve_custom_op(luci::CircleCustom *cop)
  
    if (custom_code == "BatchMatMulV2")
    {
+    auto name = cop->name();
+    assert(name.length() > 0);
+
      auto batch_matmul = cop->graph()->nodes()->create<luci::CircleBatchMatMul>();
      // input
      batch_matmul->x(cop->inputs(0));
@@ -39,10 +43,16 @@ bool resolve_custom_op(luci::CircleCustom *cop)
      auto map = flexbuffers::GetRoot(custom_options).AsMap();
      batch_matmul->adj_x(map["adj_x"].AsBool());
      batch_matmul->adj_y(map["adj_y"].AsBool());
+    batch_matmul->name(name + "/BatchMatMul");
+    luci::add_origin(batch_matmul, luci::get_origin(cop));
+
+    auto customOut = loco::succs(cop);
+    assert(customOut.size() == 1);
+    replace(*customOut.begin()).with(batch_matmul);
  
-    replace(cop).with(batch_matmul);
      return true;
    }
+
    return false;
  }
  
@@ -51,6 +61,27 @@ bool resolve_custom_op(luci::CircleCustom *cop)
  namespace luci
  {
  
+/**
+ *  BEFORE
+ *         |             |
+ *    [CircleNode]  [CircleNode]
+ *          \           /
+ *         [CircleCustom]("BatchMatMulV2")
+ *               |
+ *        [CircleCustomOut]
+ *               |
+ *          [CircleNode]
+ *               |
+ *
+ *  AFTER
+ *         |             |
+ *    [CircleNode]  [CircleNode]
+ *          \           /
+ *       [CircleBatchMatMul]
+ *               |
+ *          [CircleNode]
+ *               |
+ */
  bool ResolveCustomOpBatchMatMulPass::run(loco::Graph *g)
  {
    bool changed = false;
@@ -60,7 +91,8 @@ bool ResolveCustomOpBatchMatMulPass::run(loco::Graph *g)
      if (not cop)
        continue;
  
-    changed |= resolve_custom_op(cop);
+    if (resolve_custom_op(cop))
+      changed = true;
    }
  
    return changed;
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp

new file mode 100644 (file)

index 0000000..435016f
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/flexbuffers.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+const int N = 1;
+const int C = 2;
+const int H_X = 1;
+const int W_X = 4;
+const int H_Y = 4;
+const int W_Y = 4;
+
+/**
+ *  graph having Custom operator BatchMatMulV2
+ *
+ *  [CircleInput]  [CircleInput]
+ *         \         /
+ *       [CircleCustom]
+ *             |
+ *      [CircleCustomOut]
+ *             |
+ *       [CircleOutput]
+ */
+class BatchMatmulV2Graphlet
+{
+public:
+  BatchMatmulV2Graphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    // custom option
+    auto flatbuffer_builder =
+      std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+    auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+    size_t map_start = flex_buffers->StartMap();
+    flex_buffers->Bool("adj_x", false);
+    flex_buffers->Bool("adj_y", false);
+    flex_buffers->Int("T", 0 /* circle::TensorType_FLOAT32 */);
+    flex_buffers->EndMap(map_start);
+    flex_buffers->Finish();
+
+    // CircleCustom(BatchMatMulV2, adj_x=False, adj_y=False)
+    _batchmatmulv2 = g->nodes()->create<luci::CircleCustom>(2, 1);
+    _batchmatmulv2->custom_code("BatchMatMulV2");
+    _batchmatmulv2->custom_options(flex_buffers->GetBuffer());
+    _batchmatmulv2->shape({N, C, H_X, W_Y});
+    _batchmatmulv2->dtype(loco::DataType::FLOAT32);
+    _batchmatmulv2->name("batchmatmulv2");
+
+    // CircleCustomOut
+    _batchmatmulv2_out = g->nodes()->create<luci::CircleCustomOut>();
+    _batchmatmulv2_out->shape({N, C, H_X, W_Y});
+    _batchmatmulv2_out->dtype(loco::DataType::FLOAT32);
+    _batchmatmulv2_out->index(0);
+  }
+
+public:
+  luci::CircleCustom *batchmatmulv2() { return _batchmatmulv2; }
+
+protected:
+  luci::CircleCustom *_batchmatmulv2 = nullptr;
+  luci::CircleCustomOut *_batchmatmulv2_out = nullptr;
+};
+
+class BatchMatmulV2Graph : public TestIsGraphlet<2>,
+                           public TestOGraphlet,
+                           public BatchMatmulV2Graphlet
+{
+public:
+  BatchMatmulV2Graph() = default;
+
+  void init(void)
+  {
+    TestIsGraphlet<2>::init(g(), {{N, C, H_X, W_X}, {N, C, H_X, W_X}});
+    TestOGraphlet::init(g(), {N, C, H_X, W_Y});
+    BatchMatmulV2Graphlet::init(g());
+
+    // TODO how set multiple of shape vector for TestIsGraphlet?
+    // update shape for second input
+    input(1)->shape({N, C, H_Y, W_Y});
+
+    // connect graph
+    _batchmatmulv2->inputs(0, input(0));
+    _batchmatmulv2->inputs(1, input(1));
+    _batchmatmulv2_out->input(_batchmatmulv2);
+
+    output()->from(_batchmatmulv2_out);
+  }
+};
+
+class BatchMatmulV2GraphTest : public ::testing::Test
+{
+public:
+  BatchMatmulV2Graph g;
+  luci::ResolveCustomOpBatchMatMulPass pass;
+};
+
+} // namespace
+
+TEST(ResolveCustomOpBatchMatMulPassTest, name)
+{
+  luci::ResolveCustomOpBatchMatMulPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+/**
+ *  Optimized graph looks like below.
+ *
+ *  [CircleInput]
+ *        |
+ *  [CircleBatchMatMul]
+ *        |
+ *  [CircleOutput]
+ */
+TEST_F(BatchMatmulV2GraphTest, simple_test)
+{
+  g.init();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto batchmatmul = dynamic_cast<luci::CircleBatchMatMul *>(g.output()->from());
+  EXPECT_NE(nullptr, batchmatmul);
+
+  auto input_0 = dynamic_cast<luci::CircleInput *>(batchmatmul->x());
+  auto input_1 = dynamic_cast<luci::CircleInput *>(batchmatmul->y());
+  EXPECT_NE(nullptr, input_0);
+  EXPECT_NE(nullptr, input_1);
+}
+
+TEST_F(BatchMatmulV2GraphTest, wrong_condition_NEG)
+{
+  g.init();
+
+  // wrong custom code
+  g.batchmatmulv2()->custom_code("BatchMatMulv2"); // v is lower case
+  auto ret = pass.run(g.g());
+
+  EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp

index 547fd22fcefa82d47d68a566cb23bafc082d70cb..2167780666a9c4076329468865c890feff52cfd8 100644 (file)
--- a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
@@ -20,11 +20,10 @@
  #include <loco/IR/DataTypeTraits.h>
  
  #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  #include <loco.h>
  #include <oops/InternalExn.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/TypeInference.h>
  
  namespace
  {
@@ -44,6 +43,7 @@ luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
      node->dim(i) = shape.at(i);
      size *= shape.at(i);
    }
+  node->shape_status(luci::ShapeStatus::VALID);
  
  #define INIT_VALUES(DT)                          \
    {                                              \
@@ -90,6 +90,9 @@ bool resolve_matmul(luci::CircleCustom *cop)
    const auto S32 = loco::DataType::S32;
    const auto FLOAT32 = loco::DataType::FLOAT32;
  
+  auto name = cop->name();
+  assert(name.length() > 0);
+
    bool transpose_a = map["transpose_a"].AsBool();
    bool transpose_b = map["transpose_b"].AsBool();
  
@@ -97,34 +100,38 @@ bool resolve_matmul(luci::CircleCustom *cop)
    loco::Node *rhs = cop->inputs(1);
  
    // Check that the type of the first input is known
-  CHECK_OR_FALSE(loco::dtype_known(lhs));
-  auto lhs_dtype = loco::dtype_get(cop->inputs(0));
+  auto lhs_dtype = loco::must_cast<luci::CircleNode *>(cop->inputs(0))->dtype();
+  CHECK_OR_FALSE(lhs_dtype != loco::DataType::Unknown);
  
    // If transpose of first input is requested, its shape must be known
-  CHECK_OR_FALSE(!transpose_a || loco::shape_known(lhs));
+  auto circle_lhs = loco::must_cast<luci::CircleNode *>(lhs);
+  CHECK_OR_FALSE(!transpose_a || circle_lhs->shape_status() == luci::ShapeStatus::VALID);
    // and its rank should be at least 2
-  CHECK_OR_FALSE(!transpose_a || loco::shape_get(lhs).as<loco::TensorShape>().rank() >= 2);
+  CHECK_OR_FALSE(!transpose_a || circle_lhs->rank() >= 2);
    // Check that the shape of the 2nd input is known
-  CHECK_OR_FALSE(loco::shape_known(rhs));
+  auto circle_rhs = loco::must_cast<luci::CircleNode *>(rhs);
+  CHECK_OR_FALSE(circle_rhs->shape_status() == luci::ShapeStatus::VALID);
    // TODO as of 06/23/20 TFLite only supports rank 2 for 2nd input. Fix this once that changes!
-  CHECK_OR_FALSE(loco::shape_get(rhs).as<loco::TensorShape>().rank() == 2);
+  CHECK_OR_FALSE(circle_rhs->rank() == 2);
    // Check that input data type is supported
    CHECK_OR_THROW(lhs_dtype == U8 || lhs_dtype == S16 || lhs_dtype == FLOAT32,
                   "Only UInt8, Int16 and Float32 data types are supported by MatMul");
  
    if (transpose_a)
    {
-    auto a_shape = loco::shape_get(lhs).as<loco::TensorShape>();
      // Create a permutation constant node
      std::vector<uint32_t> perm;
-    for (uint32_t i = 0; i < a_shape.rank(); ++i)
+    for (uint32_t i = 0; i < circle_lhs->rank(); ++i)
        perm.push_back(i);
-    std::swap(perm[a_shape.rank() - 1], perm[a_shape.rank() - 2]);
-    auto perm_node = create_const_node(graph, S32, {a_shape.rank()}, perm);
+    std::swap(perm[circle_lhs->rank() - 1], perm[circle_lhs->rank() - 2]);
+    auto perm_node = create_const_node(graph, S32, {circle_lhs->rank()}, perm);
+    perm_node->name(name + "/lhs/Transpose/perm");
      // Now make a transpose node
      auto transpose_node = graph->nodes()->create<luci::CircleTranspose>();
      transpose_node->a(lhs);
      transpose_node->perm(perm_node);
+    transpose_node->name(name + "/lhs/Transpose");
+    luci::add_origin(transpose_node, luci::get_origin(cop));
      lhs = transpose_node;
    }
  
@@ -135,24 +142,29 @@ bool resolve_matmul(luci::CircleCustom *cop)
    {
      const std::vector<uint32_t> perm{1, 0};
      auto perm_node = create_const_node(graph, S32, {2}, perm);
+    perm_node->name(name + "/rhs/Transpose/perm");
      auto transpose_node = graph->nodes()->create<luci::CircleTranspose>();
      transpose_node->a(rhs);
      transpose_node->perm(perm_node);
+    transpose_node->name(name + "/rhs/Transpose");
+    luci::add_origin(transpose_node, luci::get_origin(cop));
      rhs = transpose_node;
    }
  
-  // Make a constant zero-filled bias node
-  auto b_shape = loco::shape_get(cop->inputs(1)).as<loco::TensorShape>();
-  uint32_t bias_size = b_shape.dim(transpose_b ? 1 : 0).value();
-  const std::vector<float> val(bias_size, .0f);
-  auto bias_node = create_const_node(graph, lhs_dtype, {bias_size}, val);
+  auto empty_bias = graph->nodes()->create<luci::CircleOutputExclude>();
+  empty_bias->dtype(loco::DataType::FLOAT32); // Needed for type inference
+
    auto fc_node = graph->nodes()->create<luci::CircleFullyConnected>();
    fc_node->input(lhs);
    fc_node->weights(rhs);
-  fc_node->bias(bias_node);
+  fc_node->bias(empty_bias);
    fc_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fc_node->name(name + "/FullyConnected");
+  luci::add_origin(fc_node, luci::get_origin(cop));
  
-  replace(cop).with(fc_node);
+  auto customOut = loco::succs(cop);
+  assert(customOut.size() == 1);
+  replace(*customOut.begin()).with(fc_node);
    return true;
  }
  
diff --git a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.test.cpp

new file mode 100644 (file)

index 0000000..c4ea3ea
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpMatMulPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveCustomOpMatMulPassTest, name)
+{
+  luci::ResolveCustomOpMatMulPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ShapeInferencePass.cpp b/compiler/luci/pass/src/ShapeInferencePass.cpp

deleted file mode 100644 (file)

index 4bd0aae..0000000
--- a/compiler/luci/pass/src/ShapeInferencePass.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/ShapeInferencePass.h"
-
-#include <luci/IR/CircleDialect.h>
-#include <luci/Service/CircleShapeInferenceRule.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/CanonicalShapeInferenceRule.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/MultiDialectShapeInferenceRule.h>
-
-namespace luci
-{
-
-bool ShapeInferencePass::run(luci::Module *m)
-{
-  bool changed = false;
-
-  for (size_t g = 0; g < m->size(); ++g)
-  {
-    if (run(m->graph(g)))
-      changed = true;
-  }
-
-  return changed;
-}
-
-bool ShapeInferencePass::run(loco::Graph *g)
-{
-  loco::CanonicalShapeInferenceRule canonical_rule;
-  luci::CircleShapeInferenceRule circle_rule;
-
-  loco::MultiDialectShapeInferenceRule rules;
-
-  rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(luci::CircleDialect::get(), &circle_rule);
-
-  return loco::apply(&rules).to(g);
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp b/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp

deleted file mode 100644 (file)

index 115b77a..0000000
--- a/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/ShapeSignatureInferencePass.h"
-
-#include <luci/IR/CircleShapeSignature.h>
-#include <luci/Service/CircleShapeSignatureInference.h>
-
-#include <loco.h>
-
-namespace luci
-{
-
-bool ShapeSignatureInferencePass::run(luci::Module *m)
-{
-  bool changed = false;
-
-  for (size_t g = 0; g < m->size(); ++g)
-  {
-    if (run(m->graph(g)))
-      changed = true;
-  }
-
-  return changed;
-}
-
-bool ShapeSignatureInferencePass::run(loco::Graph *g)
-{
-  luci::ssinf::Rule signature_inference_rule;
-  bool changed = false;
-
-  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
-  {
-    luci::ShapeSignature shape_signature;
-
-    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    if (signature_inference_rule.infer(circle_node, shape_signature))
-    {
-      if (!(circle_node->shape_signature() == shape_signature))
-      {
-        circle_node->shape_signature(shape_signature);
-        changed = true;
-      }
-    }
-  }
-
-  return changed;
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp

index 6a58f18c520f0c12052bc70e7d98563e2214ef8b..92060f6251ab230543747d78d6d7cc0d0eca3a5d 100644 (file)
--- a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp
+++ b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp
@@ -72,6 +72,9 @@ luci::CircleConst *shuffle_weight(luci::CircleFullyConnected *fc)
  {
    auto the_weights = loco::must_cast<luci::CircleConst *>(fc->weights());
  
+  auto name = fc->name();
+  assert(name.length() > 0);
+
    // create CircleConst where shuffled data will be stored
    luci::CircleConst *new_weights = fc->graph()->nodes()->create<luci::CircleConst>();
    new_weights->dtype(loco::DataType::FLOAT32);
@@ -82,6 +85,7 @@ luci::CircleConst *shuffle_weight(luci::CircleFullyConnected *fc)
    {
      new_weights->dim(r).set(the_weights->dim(r).value());
    }
+  new_weights->name(name + "/shuffle_weight");
  
    // suffle weight
    const uint32_t MULTIPLE = 16;
@@ -96,7 +100,7 @@ luci::CircleConst *shuffle_weight(luci::CircleFullyConnected *fc)
        for (uint32_t i = 0; i < MULTIPLE; i++)
        {
          new_weights->at<loco::DataType::FLOAT32>(index++) =
-            the_weights->at<loco::DataType::FLOAT32>((r * MULTIPLE + i) * cols + c);
+          the_weights->at<loco::DataType::FLOAT32>((r * MULTIPLE + i) * cols + c);
        }
      }
    }
@@ -131,6 +135,8 @@ bool ShuffleWeightTo16x1Float32Pass::run(loco::Graph *g)
        fc->weights(new_weights);
        fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32);
      }
+
+    changed = true;
    }
  
    return changed;
diff --git a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp

index 9745e57541437385a677342e5165287fdc6bb1f8..0779859778c7cdf4c67b33753989598892694fd8 100644 (file)
--- a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp
+++ b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp
@@ -18,61 +18,86 @@
  
  #include <luci/IR/CircleNodes.h>
  
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
  #include <gtest/gtest.h>
  
-void create_fc_net(loco::Graph *g)
+namespace
  {
-  assert(g);
-
-  const uint32_t ROW = 16;
-  const uint32_t COL = 2;
-  const uint32_t elements_num = ROW * COL;
-
-  // input
-  auto input = g->nodes()->create<luci::CircleInput>();
-  auto graph_input = g->inputs()->create();
-  input->index(graph_input->index());
-
-  // fc weights
-  auto weights = g->nodes()->create<luci::CircleConst>();
-  weights->dtype(loco::DataType::FLOAT32);
-  weights->size<loco::DataType::FLOAT32>(elements_num);
-  weights->rank(2);
-  weights->dim(0).set(ROW);
-  weights->dim(1).set(COL);
-  for (uint32_t idx = 0; idx < elements_num; idx++)
+
+using namespace luci::test;
+
+class FCGraphlet
+{
+public:
+  FCGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 wshape)
    {
-    weights->at<loco::DataType::FLOAT32>(idx) = idx;
+    const uint32_t elements_num = num_elements(wshape);
+
+    // fc weights
+    _weights = g->nodes()->create<luci::CircleConst>();
+    _weights->dtype(loco::DataType::FLOAT32);
+    _weights->shape(wshape);
+    _weights->size<loco::DataType::FLOAT32>(elements_num);
+    for (uint32_t idx = 0; idx < elements_num; idx++)
+    {
+      _weights->at<loco::DataType::FLOAT32>(idx) = idx;
+    }
+    _weights->name("weights");
+
+    // fc
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->name("fc");
    }
  
-  // fc
-  auto fc = g->nodes()->create<luci::CircleFullyConnected>();
-  fc->dtype(loco::DataType::FLOAT32);
-  fc->input(input);
-  fc->weights(weights);
-
-  // output
-  auto output = g->nodes()->create<luci::CircleOutput>();
-  output->from(fc);
-  auto graph_output = g->outputs()->create();
-  output->index(graph_output->index());
-}
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleConst *_weights = nullptr;
+};
  
-TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1)
+class FCGraph : public TestIGraphlet, public TestOGraphlet, public FCGraphlet
  {
-  auto graph = loco::make_graph();
-  create_fc_net(graph.get());
+public:
+  FCGraph() = default;
  
-  luci::CircleFullyConnected *fc_node = nullptr;
-  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  void init(const ShapeU32 shape, const ShapeU32 wshape)
    {
-    auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
-    if (not fc)
-      continue;
+    TestIGraphlet::init(g(), shape);
+    TestOGraphlet::init(g(), shape);
+    FCGraphlet::init(g(), wshape);
+
+    // connect graph
+    _fc->input(input());
+    _fc->weights(_weights);
  
-    fc_node = fc;
-    break;
+    output()->from(_fc);
    }
+};
+
+} // namespace
+
+TEST(ShuffleWeightTo16x1Float32PassTest, name)
+{
+  luci::ShuffleWeightTo16x1Float32Pass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+const uint32_t ROW = 16;
+const uint32_t COL = 2;
+
+TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1)
+{
+  FCGraph g;
+
+  g.init({ROW, COL}, {ROW, COL});
+
+  auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
    ASSERT_NE(fc_node, nullptr);
    auto weights = loco::must_cast<luci::CircleConst *>(fc_node->weights());
    // before
@@ -94,7 +119,7 @@ TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1)
    ASSERT_EQ(15, weights->at<loco::DataType::FLOAT32>(15));
  
    luci::ShuffleWeightTo16x1Float32Pass pass;
-  while (pass.run(graph.get()))
+  while (pass.run(g.g()))
      ;
  
    weights = loco::must_cast<luci::CircleConst *>(fc_node->weights());
@@ -116,3 +141,33 @@ TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1)
    ASSERT_EQ(28, weights->at<loco::DataType::FLOAT32>(14));
    ASSERT_EQ(30, weights->at<loco::DataType::FLOAT32>(15));
  }
+
+TEST(ShuffleWeightTo16x1Float32PassTest, invalid_weight_shape_NEG)
+{
+  FCGraph g;
+
+  g.init({ROW, COL}, {1, ROW, COL, 1});
+
+  auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
+  ASSERT_NE(fc_node, nullptr);
+
+  luci::ShuffleWeightTo16x1Float32Pass pass;
+  auto ret = pass.run(g.g());
+
+  ASSERT_FALSE(ret);
+}
+
+TEST(ShuffleWeightTo16x1Float32PassTest, invalid_weight_row16_NEG)
+{
+  FCGraph g;
+
+  g.init({COL, ROW}, {COL, ROW});
+
+  auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
+  ASSERT_NE(fc_node, nullptr);
+
+  luci::ShuffleWeightTo16x1Float32Pass pass;
+  auto ret = pass.run(g.g());
+
+  ASSERT_FALSE(ret);
+}
diff --git a/compiler/luci/pass/src/Sparsifier.cpp b/compiler/luci/pass/src/Sparsifier.cpp

index 210c1a34ceb8c273807c47c6a4832cdc9f6e526a..18ab45f980cfbcd4148f4b03982d1bdc0f9c4221 100644 (file)
--- a/compiler/luci/pass/src/Sparsifier.cpp
+++ b/compiler/luci/pass/src/Sparsifier.cpp
@@ -26,8 +26,8 @@ Sparsifier<T>::Sparsifier(const std::vector<int32_t> &shape,
                            const std::vector<DimensionType> &format,
                            const std::vector<int32_t> &block_size,
                            const std::vector<int32_t> &block_map)
-    : _dense_shape(shape), _traversal_order(traversal_order), _block_size(block_size),
-      _block_map(block_map)
+  : _dense_shape(shape), _traversal_order(traversal_order), _block_size(block_size),
+    _block_map(block_map)
  {
    _dense_size = 1;
    int32_t block_dim = 0;
diff --git a/compiler/luci/pass/src/Sparsifier.test.cpp b/compiler/luci/pass/src/Sparsifier.test.cpp

index 272e0e934c3ebdae1689a004f0e8c499d49e7ee1..14e24aad75f2cfae15dc8606a9ffe72d20b94b21 100644 (file)
--- a/compiler/luci/pass/src/Sparsifier.test.cpp
+++ b/compiler/luci/pass/src/Sparsifier.test.cpp
@@ -190,6 +190,6 @@ TEST(SparsifierTest, WrongFormatRank_NEG)
    const std::vector<int32_t> block_size = {4, 1};
    const std::vector<int32_t> block_map = {0, 1};
    EXPECT_THROW(
-      luci::Sparsifier<int32_t>(dense_shape, traversal_order, format, block_size, block_map),
-      std::out_of_range);
+    luci::Sparsifier<int32_t>(dense_shape, traversal_order, format, block_size, block_map),
+    std::out_of_range);
  }
diff --git a/compiler/luci/pass/src/SparsifyTensorPass.cpp b/compiler/luci/pass/src/SparsifyTensorPass.cpp

index 2f1a36e779b8d8d58311bef480f0bd73e3c0a696..1a75bfb0c1454e464103d6b53213ad4effa055da 100644 (file)
--- a/compiler/luci/pass/src/SparsifyTensorPass.cpp
+++ b/compiler/luci/pass/src/SparsifyTensorPass.cpp
@@ -69,11 +69,11 @@ template <loco::DataType DT> void SparsifyTensorPass::sparsify_tensor(luci::Circ
      else if (_format.at(idx) == DimensionType::SPARSE_CSR)
      {
        sparsityparam->dim_metadata.emplace_back(
-          DimensionType::SPARSE_CSR, /* dense size */ 0,
-          /* array_segments */ SparseIndexVector{SparseIndexVectorType::U16,
-                                                 dim_metadata.at(idx * 2)},
-          /* array_indices */ SparseIndexVector{SparseIndexVectorType::U16,
-                                                dim_metadata.at(idx * 2 + 1)});
+        DimensionType::SPARSE_CSR, /* dense size */ 0,
+        /* array_segments */
+        SparseIndexVector{SparseIndexVectorType::U16, dim_metadata.at(idx * 2)},
+        /* array_indices */
+        SparseIndexVector{SparseIndexVectorType::U16, dim_metadata.at(idx * 2 + 1)});
      }
    }
    for (uint32_t i = 0; i < _block_size.size(); i++)
diff --git a/compiler/luci/pass/src/SparsifyTensorPass.test.cpp b/compiler/luci/pass/src/SparsifyTensorPass.test.cpp

new file mode 100644 (file)

index 0000000..372e8e5
--- /dev/null
+++ b/compiler/luci/pass/src/SparsifyTensorPass.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SparsifyTensorPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(SparsifyTensorPassTest, name)
+{
+  std::vector<int32_t> to;
+  std::vector<luci::DimensionType> vdt;
+  std::vector<int32_t> bs;
+  std::vector<int32_t> bm;
+  luci::SparsifyTensorPass pass("", to, vdt, bs, bm);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp b/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp

index 44e974b9133a1879f8e1479007f4917184ed441f..d8676cd62b4ae53159974438270c3cbc48fecf0f 100644 (file)
--- a/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp
+++ b/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp
@@ -17,10 +17,22 @@
  #include "luci/Pass/SubstitutePackToReshapePass.h"
  
  #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
  
  namespace
  {
  
+int32_t unknown_dim_count(luci::CircleNode *node)
+{
+  int32_t count = 0;
+
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    if (!node->dim(i).known())
+      ++count;
+
+  return count;
+}
+
  bool substitute_pack_to_reshape(luci::CircleNode *node)
  {
    auto target_node = dynamic_cast<luci::CirclePack *>(node);
@@ -35,9 +47,14 @@ bool substitute_pack_to_reshape(luci::CircleNode *node)
    if (axis < 0)
      axis = axis + static_cast<int32_t>(value_node->rank()) + 1;
  
+  auto name = node->name();
+  assert(name.length() > 0);
+
    auto graph = target_node->graph();
    auto reshape_node = graph->nodes()->create<luci::CircleReshape>();
    reshape_node->tensor(value_node);
+  reshape_node->name(name + "/Reshape");
+  luci::add_origin(reshape_node, luci::get_origin(node));
  
    auto const_node = graph->nodes()->create<luci::CircleConst>();
    const_node->dtype(loco::DataType::S32);
@@ -53,13 +70,16 @@ bool substitute_pack_to_reshape(luci::CircleNode *node)
      }
      else if (i < axis)
      {
-      const_node->at<loco::DataType::S32>(i) = value_node->dim(i).value();
+      const_node->at<loco::DataType::S32>(i) =
+        value_node->dim(i).known() ? value_node->dim(i).value() : -1;
      }
      else
      {
-      const_node->at<loco::DataType::S32>(i) = value_node->dim(i - 1).value();
+      const_node->at<loco::DataType::S32>(i) =
+        value_node->dim(i - 1).known() ? value_node->dim(i - 1).value() : -1;
      }
    }
+  const_node->name(name + "/Reshape/shape");
    reshape_node->shape(const_node);
    replace(target_node).with(reshape_node);
    return true;
@@ -71,24 +91,23 @@ namespace luci
  {
  
  /**
- *   BEFORE
- *      |
- * [CircleNode]
- *      |
- * [CirclePack]
- *      |
- * [CircleNode]
- *      |
+ * BEFORE
+ *           |
+ *      [CircleNode]
+ *           |
+ *      [CirclePack]
+ *           |
+ *      [CircleNode]
+ *           |
   *
- *    AFTER
- *      |
- * [CircleNode]  [CircleConst]
- *       \             /
- *       [CircleReshape]
+ * AFTER
   *             |
- *        [CircleNode]
- *             |
- *
+ *        [CircleNode]  [CircleConst]
+ *           |   \             /
+ *  [CirclePack] [CircleReshape]
+ *                      |
+ *                 [CircleNode]
+ *                      |
   */
  bool SubstitutePackToReshapePass::run(loco::Graph *g)
  {
@@ -96,7 +115,7 @@ bool SubstitutePackToReshapePass::run(loco::Graph *g)
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    if (substitute_pack_to_reshape(circle_node))
+    if (unknown_dim_count(circle_node) <= 1 && substitute_pack_to_reshape(circle_node))
      {
        changed = true;
      }
diff --git a/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp b/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp

index 143b8889624957179a62a2f0d921811b8558a02f..3b5d4ea2c66b17a5b402aade0faaa24d7f3574cd 100644 (file)
--- a/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp
+++ b/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp
@@ -22,26 +22,6 @@
  namespace
  {
  
-/**
- *           BEFORE
- *             |
- *        [CircleNode]
- *             |
- *        [CirclePack]
- *             |
- *        [CircleNode]
- *             |
- *
- *           AFTER
- *      |
- * [CircleNode]  [CircleConst]
- *       \             /
- *       [CircleReshape]
- *             |
- *        [CircleNode]
- *             |
- *
- */
  void create_substitute_pack_to_reshape(loco::Graph *g, const std::initializer_list<uint32_t> shape,
                                         int32_t axis)
  {
@@ -54,23 +34,33 @@ void create_substitute_pack_to_reshape(loco::Graph *g, const std::initializer_li
    input->shape_status(luci::ShapeStatus::VALID);
    input->rank(shape.size());
    input->shape(shape);
+  input->name("input");
  
    // Pack Node create.
    auto pack = g->nodes()->create<luci::CirclePack>(1);
    pack->values(0, input);
    pack->axis(axis);
+  pack->name("pack");
  
    // Output Connect.
    auto output = g->nodes()->create<luci::CircleOutput>();
    output->from(pack);
    auto graph_output = g->outputs()->create();
    output->index(graph_output->index());
+  output->name("output");
  
    return;
  }
  
  } // namespace
  
+TEST(SubstitutePackToReshapePassTest, name)
+{
+  luci::SubstitutePackToReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
  TEST(SubstitutePackToReshapePass, simple_case)
  {
    auto graph = loco::make_graph();
diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp

new file mode 100644 (file)

index 0000000..74be86a
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/**
+ * @brief return TRUE if all dim is known
+ * @note This pass can be applied even some of dimensions are unknown.
+         For now, do not consider about it and update logic later.
+ */
+bool can_squeeze_shape(const luci::CircleNode *node)
+{
+  for (uint32_t r = 0; r < node->rank(); ++r)
+  {
+    if (not node->dim(r).known())
+      return false;
+  }
+  return true;
+}
+
+/**
+ * @brief return valid unsigned dim value from 0 ~ (rank-1)
+ * @note  dim can be -rank to (rank-1)
+ */
+uint32_t valid_unsigned_dim(uint32_t rank, int32_t dim)
+{
+  int32_t irank = static_cast<int32_t>(rank);
+  return dim >= 0 ? static_cast<uint32_t>(dim) : static_cast<uint32_t>(irank + dim);
+}
+
+/**
+ * @brief return TRUE if input dim is 1 for squeeze_dims values
+ */
+bool is_valid_input(const luci::CircleNode *node, const std::vector<int32_t> &squeeze_dims)
+{
+  auto rank = node->rank();
+  for (auto dim : squeeze_dims)
+  {
+    auto udim = valid_unsigned_dim(rank, dim);
+    if (node->dim(udim).value() != 1)
+      return false;
+  }
+  return true;
+}
+
+/**
+ * @brief return shape vector from input
+ */
+std::vector<uint32_t> node_shape(const luci::CircleNode *input)
+{
+  std::vector<uint32_t> shape;
+  uint32_t rank = input->rank();
+  for (uint32_t r = 0; r < rank; ++r)
+    shape.push_back(input->dim(r).value());
+
+  return shape;
+}
+
+/**
+ * @brief return CircleConst ptr with values of new_shape
+ */
+luci::CircleConst *create_shape_const(loco::Graph *graph, const std::vector<uint32_t> &new_shape)
+{
+  // NOTE dim_size can be 0
+  uint32_t dim_size = static_cast<uint32_t>(new_shape.size());
+
+  auto shape_const = graph->nodes()->create<luci::CircleConst>();
+
+  // const shape/dtype
+  shape_const->dtype(loco::DataType::S32);
+  if (dim_size > 0)
+  {
+    shape_const->rank(1);
+    shape_const->dim(0).set(dim_size);
+  }
+  else
+    shape_const->rank(0);
+  shape_const->shape_status(luci::ShapeStatus::VALID);
+
+  // constant values
+  shape_const->size<loco::DataType::S32>(dim_size);
+  for (uint32_t i = 0; i < dim_size; ++i)
+    shape_const->at<loco::DataType::S32>(i) = new_shape.at(i);
+
+  return shape_const;
+}
+
+bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze)
+{
+  assert(squeeze != nullptr);
+
+  auto input = loco::must_cast<luci::CircleNode *>(squeeze->input());
+  // we need input node shape and all dim should be known
+  if (input->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+  if (not can_squeeze_shape(input))
+    return false;
+
+  // we will use squeeze shape for new shape
+  if (squeeze->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+
+  auto squeeze_dims = squeeze->squeeze_dims();
+  if (not is_valid_input(input, squeeze_dims))
+    throw std::runtime_error("Invalid values in squeeze_dims: " + squeeze->name());
+
+  auto name = squeeze->name();
+  assert(name.length() > 0);
+
+  auto reshape_shape = node_shape(squeeze);
+  auto graph = squeeze->graph();
+  auto reshape = graph->nodes()->create<luci::CircleReshape>();
+  auto shape_const = create_shape_const(graph, reshape_shape);
+  reshape->name(name + "/Reshape");
+  luci::add_origin(reshape, luci::get_origin(squeeze));
+  shape_const->name(name + "/Reshape/shape");
+
+  // graph connection
+  reshape->tensor(input);
+  reshape->shape(shape_const);
+  replace(squeeze).with(reshape);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *           |
+ *      [CircleNode]
+ *           |
+ *    [CircleSqueeze]
+ *           |
+ *      [CircleNode]
+ *           |
+ *
+ * AFTER
+ *               |
+ *          [CircleNode]  [CircleConst]
+ *             |    \             /
+ *  [CircleSqueeze] [CircleReshape]
+ *                        |
+ *                   [CircleNode]
+ *                        |
+ */
+bool SubstituteSqueezeToReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto squeeze = dynamic_cast<luci::CircleSqueeze *>(node))
+    {
+      if (substitute_squeeze_to_reshape(squeeze))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.test.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.test.cpp

new file mode 100644 (file)

index 0000000..d917af6
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.test.cpp
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using uilist = std::initializer_list<uint32_t>;
+using ilist = std::initializer_list<int32_t>;
+
+class PassTestGraph
+{
+public:
+  PassTestGraph() = default;
+
+public:
+  void init(const uilist shape_in, const uilist shape_out)
+  {
+    _graph_input = _g.inputs()->create();
+    _graph_output = _g.outputs()->create();
+
+    _input = _g.nodes()->create<luci::CircleInput>();
+    _input->shape(shape_in);
+    _input->shape_status(luci::ShapeStatus::VALID);
+    _input->name("input");
+
+    _output = _g.nodes()->create<luci::CircleOutput>();
+    _output->shape(shape_out);
+    _output->shape_status(luci::ShapeStatus::VALID);
+    _output->name("output");
+
+    _input->index(_graph_input->index());
+    _output->index(_graph_output->index());
+
+    auto input_shape = std::make_unique<loco::TensorShape>();
+    set(input_shape.get(), shape_in);
+    _graph_input->shape(std::move(input_shape));
+
+    auto output_shape = std::make_unique<loco::TensorShape>();
+    set(output_shape.get(), shape_out);
+    _graph_output->shape(std::move(output_shape));
+  }
+
+protected:
+  void set(loco::TensorShape *shape, const uilist &values)
+  {
+    uint32_t r = 0;
+    shape->rank(values.size());
+    for (auto v : values)
+      shape->dim(r++).set(v);
+  }
+
+public:
+  loco::Graph *g(void) { return &_g; }
+  luci::CircleOutput *output(void) { return _output; }
+
+protected:
+  loco::Graph _g;
+  loco::GraphInput *_graph_input = nullptr;
+  loco::GraphOutput *_graph_output = nullptr;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+class SubstituteSqueezeToReshapeGraph : public PassTestGraph
+{
+public:
+  SubstituteSqueezeToReshapeGraph() = default;
+
+public:
+  void init(const uilist shape_in, const uilist shape_out, const ilist squeeze_dims)
+  {
+    PassTestGraph::init(shape_in, shape_out);
+
+    _squeeze = _g.nodes()->create<luci::CircleSqueeze>();
+    _squeeze->input(_input);
+    _squeeze->squeeze_dims(squeeze_dims);
+    _squeeze->name("squeeze");
+
+    _output->from(_squeeze);
+  }
+
+protected:
+  luci::CircleSqueeze *_squeeze = nullptr;
+};
+
+class SubstituteSqueezeToReshapeTest : public ::testing::Test
+{
+public:
+  SubstituteSqueezeToReshapeTest() = default;
+
+  void run_pass(void)
+  {
+    while (_shapeinf.run(_graph.g()) || _pass.run(_graph.g()))
+      ;
+  }
+
+protected:
+  SubstituteSqueezeToReshapeGraph _graph;
+  luci::SubstituteSqueezeToReshapePass _pass;
+  luci::CircleShapeInferencePass _shapeinf;
+};
+
+} // namespace
+
+TEST(SubstituteSqueezeToReshapePassTest, name)
+{
+  luci::SubstituteSqueezeToReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, simple_with_squeeze_dims)
+{
+  _graph.init({1, 16, 1, 1}, {1, 16}, {2, 3});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+  auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+  ASSERT_EQ(2, reshape_shape->size<loco::DataType::S32>());
+  ASSERT_EQ(1, reshape_shape->at<loco::DataType::S32>(0));
+  ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(1));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, simple_without_squeeze_dims)
+{
+  _graph.init({1, 16, 1, 1}, {16}, {});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+  auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+  ASSERT_EQ(1, reshape_shape->size<loco::DataType::S32>());
+  ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(0));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, input_with_0_dims)
+{
+  _graph.init({1, 16, 0, 1}, {16, 0}, {});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+  auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+  ASSERT_EQ(2, reshape_shape->size<loco::DataType::S32>());
+  ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(0));
+  ASSERT_EQ(0, reshape_shape->at<loco::DataType::S32>(1));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, nothing_to_squeeze)
+{
+  _graph.init({2, 16, 16, 3}, {2, 16, 16, 3}, {});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, all_to_squeeze)
+{
+  _graph.init({1, 1}, {}, {});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, wrong_squeeze_dims_NEG)
+{
+  _graph.init({1, 16, 1, 1}, {1, 16, 1, 1}, {1});
+
+  // shape inference will throw for invalid squeeze_dims
+  EXPECT_THROW(run_pass(), std::exception);
+}
diff --git a/compiler/luci/pass/src/SubstituteTransposeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteTransposeToReshapePass.cpp

new file mode 100644 (file)

index 0000000..dfd5e6c
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteTransposeToReshapePass.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/**
+ * @brief Convert transpose op in a certain condition to reshape op
+ * @details Convert transpose op if it have condition below
+ *          1. have a CircleConst perm value.
+ *          2. input have an unknown dimension less then 2
+ *          3. the order of shape that except dim value 1 remains same on input and output
+ *             eg) input shape  = (126, 201, 1, 1) => (126, 201)
+ *                 output shape = (1, 126, 1, 201) => (126, 201)
+ */
+bool substitute_transpose_to_reshape(luci::CircleTranspose *node)
+{
+  auto perm_const = dynamic_cast<luci::CircleConst *>(node->perm());
+  if (perm_const == nullptr)
+    return false;
+
+  assert(perm_const->dtype() == loco::DataType::S32);
+
+  auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+  if (perm_const->dim(0).value() != input_node->rank())
+    return false;
+
+  // If input have more than 2 unknown dimension, transpose will not be changed.
+  int count = 0;
+  for (uint32_t i = 0; i < input_node->rank(); i++)
+    if (!input_node->dim(i).known())
+      count++;
+  if (count > 1)
+    return false;
+
+  uint32_t idx = 0;
+  auto size_items = perm_const->size<loco::DataType::S32>();
+  for (uint32_t i = 0; i < size_items; i++)
+  {
+    assert(perm_const->at<loco::DataType::S32>(i) >= 0 &&
+           perm_const->at<loco::DataType::S32>(i) < static_cast<int32_t>(input_node->rank()));
+    const auto perm_value = static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i));
+    if (input_node->dim(perm_value).known() && input_node->dim(perm_value).value() == 1)
+      continue;
+    // To check idx values are increasing
+    if (idx > perm_value)
+      return false;
+    idx = perm_value;
+  }
+
+  auto name = node->name();
+  assert(name.length() > 0);
+
+  auto new_const_node = node->graph()->nodes()->create<luci::CircleConst>();
+  new_const_node->dtype(loco::DataType::S32);
+  new_const_node->size<loco::DataType::S32>(size_items);
+  new_const_node->shape_status(luci::ShapeStatus::VALID);
+  new_const_node->rank(1);
+  new_const_node->dim(0).set(size_items);
+  for (uint32_t i = 0; i < size_items; i++)
+  {
+    if (input_node->dim(static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i))).known())
+      new_const_node->at<loco::DataType::S32>(i) = static_cast<int32_t>(
+        input_node->dim(static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i))).value());
+    else
+      new_const_node->at<loco::DataType::S32>(i) = -1;
+  }
+
+  auto new_reshape_node = node->graph()->nodes()->create<luci::CircleReshape>();
+  new_reshape_node->tensor(input_node);
+  new_reshape_node->shape(new_const_node);
+  new_reshape_node->name(name + "/Reshape");
+  luci::add_origin(new_reshape_node, luci::get_origin(node));
+  new_const_node->name(name + "/Reshape/shape");
+
+  replace(node).with(new_reshape_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ *     [CircleNode]  [CircleConst]
+ *          \             /
+ *          [CircleTranspose]
+ *                 |
+ *            [CircleNode]
+ *
+ * AFTER
+ *
+ *     [CircleNode]  [CircleConst]
+ *           \             /
+ *          [CircleReshape]
+ *                 |
+ *            [CircleNode]
+ *
+ */
+bool SubstituteTransposeToReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto circle_node = dynamic_cast<luci::CircleTranspose *>(node))
+    {
+      if (substitute_transpose_to_reshape(circle_node))
+      {
+        changed = true;
+      }
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstituteTransposeToReshapePass.test.cpp b/compiler/luci/pass/src/SubstituteTransposeToReshapePass.test.cpp

new file mode 100644 (file)

index 0000000..f81f7e6
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteTransposeToReshapePass.test.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class SubstituteTransposeToReshapeTest : public ::testing::Test
+{
+public:
+  SubstituteTransposeToReshapeTest() {}
+
+  void buildGraph(const std::initializer_list<uint32_t> shape, const std::vector<int32_t> perm)
+  {
+    // Input Create.
+    input = g.nodes()->create<luci::CircleInput>();
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    input->shape_status(luci::ShapeStatus::VALID);
+    input->rank(shape.size());
+    input->shape(shape);
+    input->name("input");
+
+    // Permutation Create.
+    auto perm_const = g.nodes()->create<luci::CircleConst>();
+    perm_const->dtype(loco::DataType::S32);
+    perm_const->size<loco::DataType::S32>(perm.size());
+    perm_const->shape_status(luci::ShapeStatus::VALID);
+    perm_const->rank(1);
+    perm_const->dim(0).set(perm.size());
+    for (uint32_t i = 0; i < static_cast<uint32_t>(perm.size()); i++)
+    {
+      perm_const->at<loco::DataType::S32>(i) = perm.at(i);
+    }
+    perm_const->name("perm_const");
+
+    // Transpose Create.
+    auto transpose_node = g.nodes()->create<luci::CircleTranspose>();
+    transpose_node->a(input);
+    transpose_node->perm(perm_const);
+    transpose_node->name("transpose_node");
+
+    // Output Connect.
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->from(transpose_node);
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+    output->name("output");
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(SubstituteTransposeToReshapePassTest, name)
+{
+  luci::SubstituteTransposeToReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(SubstituteTransposeToReshapeTest, simple_case)
+{
+  // Create graph that tranpose input {126, 201, 1, 1} with permutation {2, 0, 3, 1}
+  buildGraph({126, 201, 1, 1}, std::vector<int32_t>({2, 0, 3, 1}));
+  // With this input shape and permutation values, output shape will be [1, 126, 1, 201].
+  // The order of non-one values is unchanged (126, 201).
+  // So this Transpose op can be converted to Reshape op.
+  luci::SubstituteTransposeToReshapePass pass;
+  while (pass.run(&g))
+    ;
+
+  auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+  auto transpose_node = dynamic_cast<luci::CircleTranspose *>(output->from());
+  ASSERT_NE(nullptr, reshape_node);
+  ASSERT_EQ(nullptr, transpose_node);
+  auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+  ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0));
+  ASSERT_EQ(126, new_shape->at<loco::DataType::S32>(1));
+  ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(2));
+  ASSERT_EQ(201, new_shape->at<loco::DataType::S32>(3));
+}
+
+TEST_F(SubstituteTransposeToReshapeTest, failed_to_substitute_NEG)
+{
+  // Create graph that tranpose input {126, 201, 1, 1} with permutation {2, 1, 3, 0}
+  buildGraph({126, 201, 1, 1}, std::vector<int32_t>({2, 1, 3, 0}));
+  // With this input shape and permutation values, output shape will be [1, 201, 1, 126].
+  // The order of non-one values is changed (126, 201) -> (201, 126).
+  // So this Transpose op cannot be converted to Reshape op.
+  luci::SubstituteTransposeToReshapePass pass;
+  while (pass.run(&g))
+    ;
+
+  auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+  auto transpose_node = dynamic_cast<luci::CircleTranspose *>(output->from());
+  ASSERT_EQ(nullptr, reshape_node);
+  ASSERT_NE(nullptr, transpose_node);
+}
diff --git a/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.cpp b/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.cpp

new file mode 100644 (file)

index 0000000..c15a3b6
--- /dev/null
+++ b/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
+
+#include "helpers/NodeFiller.h"
+#include "helpers/TypeMapper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+template <loco::DataType DT>
+bool is_scalar_with_value(luci::CircleConst *node, typename loco::DataTypeImpl<DT>::Type val)
+{
+  if (node->dtype() != DT)
+    return false;
+  if (node->rank() != 0)
+    return false;
+  if (node->size<DT>() != 1)
+    return false;
+  if (node->at<DT>(0) != static_cast<typename loco::DataTypeImpl<DT>::Type>(val))
+    return false;
+
+  return true;
+}
+
+/**
+ *  BEFORE
+ *        [CircleNode]
+ *              |
+ *       [CircleMinimum]
+ *              |
+ *       [CircleMaximum]
+ *              |
+ *        [CircleNode]
+ *
+ *  AFTER
+ *
+ *        [CircleNode]
+ *              |
+ *        [CircleRelu6]
+ *              |
+ *        [CircleNode]
+ *
+ *  NOTE Only max(min(input, 6), 0) pattern will be transformed.
+ */
+template <loco::DataType DT> bool transform_min_max_pattern(luci::CircleMaximum *maxi)
+{
+  if (not maxi)
+    return false;
+
+  if (maxi->dtype() != DT)
+    return false;
+
+  luci::CircleConst *maxi_const = nullptr;
+  luci::CircleMinimum *mini = nullptr;
+
+  // There are two ways Maximum takes inputs.
+  // 1. Maximum(x = CircleConst, y = CircleMinimum)
+  // 2. Maximum(x = CircleMinimum, y = CircleConst)
+  if (not luci::fill(&maxi_const, &mini).with_commutative_args_of(maxi))
+    return false;
+
+  // Maximum constant should be scalar whose value is 0.
+  if (not is_scalar_with_value<DT>(maxi_const,
+                                   static_cast<typename loco::DataTypeImpl<DT>::Type>(0)))
+    return false;
+
+  luci::CircleConst *mini_const = nullptr;
+  loco::Node *mini_input = nullptr;
+
+  // There are two ways Miminum takes inputs.
+  // 1. Miminum(x = CircleNode, y = CircleMinimum)
+  // 2. Miminum(x = CircleMinimum, y = CircleNode)
+  if (not luci::fill(&mini_const, &mini_input).with_commutative_args_of(mini))
+    return false;
+
+  // Miminum constant should be scalar whose value is 6.
+  if (not is_scalar_with_value<DT>(mini_const,
+                                   static_cast<typename loco::DataTypeImpl<DT>::Type>(6)))
+    return false;
+
+  auto name = maxi->name();
+  assert(name.length() > 0);
+
+  // Create Relu6 op
+  auto relu6 = mini->graph()->nodes()->create<luci::CircleRelu6>();
+  relu6->features(mini_input);
+  relu6->name(name + "/Relu6");
+  luci::add_origin(relu6, luci::composite_origin({luci::get_origin(maxi), luci::get_origin(mini)}));
+
+  replace(maxi).with(relu6);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool TransformMinMaxToRelu6Pass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto maxi = dynamic_cast<luci::CircleMaximum *>(node))
+    {
+      if (transform_min_max_pattern<loco::DataType::FLOAT32>(maxi))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.test.cpp b/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.test.cpp

new file mode 100644 (file)

index 0000000..9755a70
--- /dev/null
+++ b/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.test.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Minimum-Maximum pattern graph
+ *
+ *  [CircleInput]  [CircleConst]
+ *         \         /
+ *    [CircleMinimum]   [CircleConst]
+ *             |       /
+ *       [CircleMaximum]
+ *             |
+ *       [CircleOutput]
+ */
+struct MinMaxGraph
+{
+  loco::Graph _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleMinimum *_mini = nullptr;
+  luci::CircleConst *_mini_const = nullptr;
+  luci::CircleMaximum *_maxi = nullptr;
+  luci::CircleConst *_maxi_const = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+class TransformMinMaxToRelu6PassTest : public ::testing::Test
+{
+protected:
+  virtual void SetUp()
+  {
+    const int N = 1;
+    const int H = 4;
+    const int W = 4;
+    const int C = 3;
+
+    // graph input and output
+    auto graph_input = _min_max_g._g.inputs()->create();
+    auto graph_output = _min_max_g._g.outputs()->create();
+
+    // CircleInput
+    _min_max_g._input = _min_max_g._g.nodes()->create<luci::CircleInput>();
+    _min_max_g._input->index(graph_input->index());
+    _min_max_g._input->shape({N, H, W, C});
+    _min_max_g._input->dtype(loco::DataType::FLOAT32);
+    _min_max_g._input->name("input");
+
+    // CircleConst
+    _min_max_g._mini_const = _min_max_g._g.nodes()->create<luci::CircleConst>();
+    _min_max_g._mini_const->shape({}); // scalar
+    _min_max_g._mini_const->dtype(loco::DataType::FLOAT32);
+    _min_max_g._mini_const->size<loco::DataType::FLOAT32>(1);
+    _min_max_g._mini_const->at<loco::DataType::FLOAT32>(0) = 6.;
+    _min_max_g._mini_const->name("mini_const");
+
+    // CircleMinimum
+    _min_max_g._mini = _min_max_g._g.nodes()->create<luci::CircleMinimum>();
+    _min_max_g._mini->x(_min_max_g._input);
+    _min_max_g._mini->y(_min_max_g._mini_const);
+    _min_max_g._mini->shape({N, H, W, C});
+    _min_max_g._mini->dtype(loco::DataType::FLOAT32);
+    _min_max_g._mini->name("mini");
+
+    // CircleConst
+    _min_max_g._maxi_const = _min_max_g._g.nodes()->create<luci::CircleConst>();
+    _min_max_g._mini_const->shape({}); // scalar
+    _min_max_g._maxi_const->dtype(loco::DataType::FLOAT32);
+    _min_max_g._maxi_const->size<loco::DataType::FLOAT32>(1);
+    _min_max_g._maxi_const->at<loco::DataType::FLOAT32>(0) = 0.;
+    _min_max_g._maxi_const->name("maxi_const");
+
+    // CircleMaximum
+    _min_max_g._maxi = _min_max_g._g.nodes()->create<luci::CircleMaximum>();
+    _min_max_g._maxi->x(_min_max_g._mini);
+    _min_max_g._maxi->y(_min_max_g._maxi_const);
+    _min_max_g._maxi->shape({N, H, W, C});
+    _min_max_g._maxi->dtype(loco::DataType::FLOAT32);
+    _min_max_g._maxi->name("maxi");
+
+    // CircleOutput
+    _min_max_g._output = _min_max_g._g.nodes()->create<luci::CircleOutput>();
+    _min_max_g._output->index(graph_output->index());
+    _min_max_g._output->from(_min_max_g._maxi);
+    _min_max_g._output->shape({N, H, W, C});
+    _min_max_g._output->dtype(loco::DataType::FLOAT32);
+    _min_max_g._output->name("output");
+  }
+
+protected:
+  luci::TransformMinMaxToRelu6Pass _pass;
+  MinMaxGraph _min_max_g;
+};
+
+} // namespace
+
+TEST_F(TransformMinMaxToRelu6PassTest, name)
+{
+  auto const name = _pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+/**
+ *  Optimized graph looks like below.
+ *
+ *  [CircleInput]
+ *        |
+ *  [CircleRelu6]
+ *        |
+ *  [CircleOutput]
+ */
+TEST_F(TransformMinMaxToRelu6PassTest, simple_test)
+{
+  auto ret = _pass.run(&_min_max_g._g);
+  EXPECT_TRUE(ret);
+
+  auto relu6 = dynamic_cast<luci::CircleRelu6 *>(_min_max_g._output->from());
+  EXPECT_NE(nullptr, relu6);
+
+  auto input = dynamic_cast<luci::CircleInput *>(relu6->features());
+  EXPECT_NE(nullptr, input);
+}
+
+TEST_F(TransformMinMaxToRelu6PassTest, wrong_condition_NEG)
+{
+  _min_max_g._maxi_const->at<loco::DataType::FLOAT32>(0) = 2.;
+
+  auto ret = _pass.run(&_min_max_g._g);
+
+  EXPECT_FALSE(ret);
+}
diff --git a/compiler/luci/pass/src/TypeInferencePass.cpp b/compiler/luci/pass/src/TypeInferencePass.cpp

deleted file mode 100644 (file)

index 6374404..0000000
--- a/compiler/luci/pass/src/TypeInferencePass.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/TypeInferencePass.h"
-
-#include <luci/IR/CircleDialect.h>
-#include <luci/Service/CircleTypeInferenceRule.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/TypeInference.h>
-
-namespace luci
-{
-
-bool TypeInferencePass::run(luci::Module *m)
-{
-  bool changed = false;
-
-  for (size_t g = 0; g < m->size(); ++g)
-  {
-    if (run(m->graph(g)))
-      changed = true;
-  }
-
-  return changed;
-}
-
-bool TypeInferencePass::run(loco::Graph *g)
-{
-  loco::CanonicalTypeInferenceRule canonical_rule;
-  luci::CircleTypeInferenceRule circle_rule;
-
-  loco::MultiDialectTypeInferenceRule rules;
-
-  rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(luci::CircleDialect::get(), &circle_rule);
-
-  return loco::apply(&rules).to(g);
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h

new file mode 100644 (file)

index 0000000..32f0d1a
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+using Granularity = luci::QuantizationGranularity;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the granualrity of channel-wise quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+struct VerifyQuantizedNodeChannelWiseGranularity final : public luci::CircleNodeVisitor<bool>
+{
+private:
+  bool is_lwq(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != 1)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != 1)
+      return false;
+
+    return true;
+  }
+
+  uint32_t rank(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    return circle_node->rank();
+  }
+
+  bool is_cwq_const(const loco::Node *node, uint32_t channel_dim)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+    assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS
+    auto channel_size = circle_node->dim(channel_dim).value();
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim))
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != channel_size)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != channel_size)
+      return false;
+
+    return true;
+  }
+
+private:
+  bool visit(const luci::CircleConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleConcatenation *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    for (uint32_t i = 0; i < node->numValues(); i++)
+    {
+      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
+    }
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthToSpace *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleInstanceNorm *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CirclePad *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  bool visit(const luci::CirclePRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleTransposeConv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+
+    return true;
+  }
+
+  bool visit(const luci::CircleFullyConnected *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleAdd *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleAveragePool2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleLogicalOr *)
+  {
+    // Logical OR has bool-type inputs and output
+    // Nothing to be checked
+    return true;
+  }
+
+  bool visit(const luci::CircleMaxPool2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleMean *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleMul *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleNotEqual *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CircleReshape *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->tensor()));
+    return true;
+  }
+
+  bool visit(const luci::CircleLogistic *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSoftmax *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->logits()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToBatchND *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToDepth *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSlice *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplit *node)
+  {
+    // node's output is the input of CircleSplitOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
+  bool visit(const luci::CircleStridedSlice *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleArgMax *node)
+  {
+    // node's output is index, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleBatchToSpaceND *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleTanh *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleTranspose *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->a()));
+    return true;
+  }
+
+  bool visit(const luci::CircleFloor *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGreater *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGreaterEqual *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleDiv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleFloorDiv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleRsqrt *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSqrt *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleElu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CirclePow *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleResizeBilinear *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  // TODO: Implement more Ops
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h

new file mode 100644 (file)

index 0000000..1e6fd53
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+using Granularity = luci::QuantizationGranularity;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the granualrity of layer-wise quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+struct VerifyQuantizedNodeLayerWiseGranularity final : public luci::CircleNodeVisitor<bool>
+{
+private:
+  bool is_lwq(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != 1)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != 1)
+      return false;
+
+    return true;
+  }
+
+  bool is_lwq_const(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != 1)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != 1)
+      return false;
+
+    return true;
+  }
+
+private:
+  bool visit(const luci::CircleConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleConcatenation *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    for (uint32_t i = 0; i < node->numValues(); i++)
+    {
+      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
+    }
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthToSpace *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleInstanceNorm *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->gamma()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->beta()))
+    return true;
+  }
+
+  bool visit(const luci::CirclePad *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  bool visit(const luci::CirclePRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->alpha()))
+    return true;
+  }
+
+  bool visit(const luci::CircleTransposeConv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleFullyConnected *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->weights()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleAdd *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleAveragePool2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleLogicalOr *)
+  {
+    // Logical OR has bool-type inputs and output
+    // Nothing to be checked
+    return true;
+  }
+
+  bool visit(const luci::CircleMaxPool2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleMean *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleMul *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleNotEqual *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CircleReshape *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->tensor()));
+    return true;
+  }
+
+  bool visit(const luci::CircleLogistic *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSoftmax *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->logits()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToBatchND *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToDepth *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSlice *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplit *node)
+  {
+    // node's output is the input of CircleSplitOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
+  bool visit(const luci::CircleStridedSlice *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleArgMax *node)
+  {
+    // node's output is index, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleBatchToSpaceND *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleTanh *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleTranspose *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->a()));
+    return true;
+  }
+
+  bool visit(const luci::CircleFloor *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGreater *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGreaterEqual *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleDiv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleFloorDiv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleRsqrt *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSqrt *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleElu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CirclePow *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleResizeBilinear *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  // TODO: Implement more Ops
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h

new file mode 100644 (file)

index 0000000..e05d832
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+using Type = loco::DataType;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the data type of INT16 quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+struct VerifyQuantizedNodeS16Type final : public luci::CircleNodeVisitor<bool>
+{
+private:
+  bool has_type(const loco::Node *node, Type dtype)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    return circle_node->dtype() == dtype;
+  }
+
+private:
+  bool visit(const luci::CircleConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
+    return true;
+  }
+
+  bool visit(const luci::CircleConcatenation *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    for (uint32_t i = 0; i < node->numValues(); i++)
+    {
+      RETURN_FALSE_UNLESS(has_type(node->values(i), Type::S16))
+    }
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthToSpace *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
+    return true;
+  }
+
+  bool visit(const luci::CircleInstanceNorm *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->beta(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CirclePad *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CirclePRelu *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleTransposeConv *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(has_type(bias, Type::S64))
+    return true;
+  }
+
+  bool visit(const luci::CircleFullyConnected *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->weights(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
+    return true;
+  }
+
+  bool visit(const luci::CircleAdd *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleAveragePool2D *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleLogicalOr *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL))
+    return true;
+  }
+
+  bool visit(const luci::CircleMaxPool2D *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleMean *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleMul *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleNotEqual *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleRelu *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleReshape *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::S16))
+    luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
+    if (shape != nullptr)
+      RETURN_FALSE_UNLESS(has_type(shape, Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleLogistic *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleSoftmax *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->logits(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToBatchND *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToDepth *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleSlice *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64))
+    RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64))
+    return true;
+  }
+
+  bool visit(const luci::CircleSplit *node)
+  {
+    // node's output is the input of CircleSplitOut, thus not quantized
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitOut *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleStridedSlice *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleArgMax *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) ||
+                        has_type(node->dimension(), Type::S64))
+    return true;
+  }
+
+  bool visit(const luci::CircleBatchToSpaceND *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleTanh *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleTranspose *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->a(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleFloor *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleGreater *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleGreaterEqual *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleDiv *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleFloorDiv *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleRsqrt *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleSqrt *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleElu *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CirclePow *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleResizeBilinear *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    return true;
+  }
+
+  // TODO: Implement more Ops
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUNTIZED_NODE_S16_TYPE_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h

new file mode 100644 (file)

index 0000000..72ce5b8
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+using Type = loco::DataType;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the data type of UINT8 quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+struct VerifyQuantizedNodeU8Type final : public luci::CircleNodeVisitor<bool>
+{
+private:
+  bool has_type(const loco::Node *node, Type dtype)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    return circle_node->dtype() == dtype;
+  }
+
+private:
+  bool visit(const luci::CircleConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleConcatenation *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    for (uint32_t i = 0; i < node->numValues(); i++)
+    {
+      RETURN_FALSE_UNLESS(has_type(node->values(i), Type::U8))
+    }
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthToSpace *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleInstanceNorm *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->beta(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CirclePad *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CirclePRelu *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleTransposeConv *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(has_type(bias, Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleFullyConnected *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->weights(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleAdd *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleAveragePool2D *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleBatchToSpaceND *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleLogicalOr *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL))
+    return true;
+  }
+
+  bool visit(const luci::CircleMaxPool2D *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleMean *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleMul *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleNotEqual *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleRelu *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleReshape *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::U8))
+    luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
+    if (shape != nullptr)
+      RETURN_FALSE_UNLESS(has_type(shape, Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleLogistic *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleSoftmax *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->logits(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToBatchND *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToDepth *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleSlice *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64))
+    RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64))
+    return true;
+  }
+
+  bool visit(const luci::CircleSplit *node)
+  {
+    // node's output is the input of CircleSplitOut, thus not quantized
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitOut *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleStridedSlice *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleArgMax *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) ||
+                        has_type(node->dimension(), Type::S64))
+    return true;
+  }
+
+  bool visit(const luci::CircleTanh *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleTranspose *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->a(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32))
+    return true;
+  }
+
+  bool visit(const luci::CircleFloor *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleGreater *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleGreaterEqual *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleDiv *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleFloorDiv *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleRsqrt *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleSqrt *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleElu *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CirclePow *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleResizeBilinear *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    return true;
+  }
+
+  // TODO: Implement more Ops
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUNTIZED_NODE_U8_TYPE_H__
diff --git a/compiler/luci/pass/src/helpers/InferenceCandidates.cpp b/compiler/luci/pass/src/helpers/InferenceCandidates.cpp

new file mode 100644 (file)

index 0000000..2c85659
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/InferenceCandidates.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InferenceCandidates.h"
+
+#include <luci/IR/DeadNodeQueryService.h>
+
+namespace luci
+{
+
+std::vector<loco::Node *> inference_candidates(loco::Graph *g)
+{
+  auto candidates = loco::postorder_traversal(loco::output_nodes(g));
+
+  for (auto node : loco::all_nodes(g))
+  {
+    // already included as candidate
+    if (std::find(candidates.begin(), candidates.end(), node) != candidates.end())
+      continue;
+
+    // As the node is not used for both graph output and multiple output operation,
+    // it cannot be candidate.
+    if (node->dialect()->service<DeadNodeQueryServiceImpl>()->isDeadNode(node))
+      continue;
+
+    candidates.emplace_back(node);
+  }
+
+  return candidates;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/InferenceCandidates.h b/compiler/luci/pass/src/helpers/InferenceCandidates.h

new file mode 100644 (file)

index 0000000..f27e4fe
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/InferenceCandidates.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INFERENCE_CANDIDATES_H__
+#define __LUCI_INFERENCE_CANDIDATES_H__
+
+#include <loco.h>
+
+#include <vector>
+
+namespace luci
+{
+
+/**
+ * @brief Enumerate all the nodes whose shape/dtype should be inferenced to export graph.
+ */
+std::vector<loco::Node *> inference_candidates(loco::Graph *g);
+
+} // namespace luci
+
+#endif // __LUCI_INFERENCE_CANDIDATES_H__
diff --git a/compiler/luci/pass/src/helpers/InferenceCandidates.test.cpp b/compiler/luci/pass/src/helpers/InferenceCandidates.test.cpp

new file mode 100644 (file)

index 0000000..e34421f
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/InferenceCandidates.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InferenceCandidates.h"
+#include "luci/IR/CircleNode.h"
+
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+bool contains(const std::vector<loco::Node *> &vec, loco::Node *val)
+{
+  return std::any_of(vec.begin(), vec.end(), [val](loco::Node *node) { return node == val; });
+}
+
+} // namespace
+
+TEST(LuciPassHelpersInferenceCandidates, inference_candidates)
+{
+  auto g = loco::make_graph();
+
+  // Create nodes
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto split = g->nodes()->create<luci::CircleSplit>();
+  auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_dim = g->nodes()->create<luci::CircleConst>();
+  auto output = g->nodes()->create<luci::CircleOutput>();
+
+  // Build up initial graph
+  auto graph_input1 = g->inputs()->create();
+  input->index(graph_input1->index());
+
+  split->split_dim(split_dim);
+  split->input(input);
+  split->num_split(2);
+
+  split_out1->input(split);
+  split_out1->index(0);
+
+  split_out2->input(split);
+  split_out2->index(1);
+
+  auto graph_output = g->outputs()->create();
+  output->from(split_out1);
+  output->index(graph_output->index());
+
+  auto s = luci::inference_candidates(g.get());
+
+  ASSERT_EQ(6, s.size());
+  ASSERT_TRUE(contains(s, input));
+  ASSERT_TRUE(contains(s, split));
+  ASSERT_TRUE(contains(s, split_out1));
+  ASSERT_TRUE(contains(s, split_out2));
+  ASSERT_TRUE(contains(s, split_dim));
+  ASSERT_TRUE(contains(s, output));
+}
+
+TEST(LuciPassHelpersInferenceCandidates, inference_candidates_NEG)
+{
+  auto g = loco::make_graph();
+
+  // Create nodes
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto split = g->nodes()->create<luci::CircleSplit>();
+  auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_dim = g->nodes()->create<luci::CircleConst>();
+  auto relu1 = g->nodes()->create<luci::CircleRelu>();
+  auto relu2 = g->nodes()->create<luci::CircleRelu>();
+  auto output = g->nodes()->create<luci::CircleOutput>();
+
+  // Build up initial graph
+  auto graph_input1 = g->inputs()->create();
+  input->index(graph_input1->index());
+
+  split->split_dim(split_dim);
+  split->input(input);
+  split->num_split(2);
+
+  split_out1->input(split);
+  split_out1->index(0);
+
+  split_out2->input(split);
+  split_out2->index(1);
+
+  relu1->features(split_out2);
+
+  relu2->features(input);
+
+  auto graph_output = g->outputs()->create();
+  output->from(split_out1);
+  output->index(graph_output->index());
+
+  auto s = luci::inference_candidates(g.get());
+
+  ASSERT_EQ(6, s.size());
+  ASSERT_TRUE(contains(s, input));
+  ASSERT_TRUE(contains(s, split));
+  ASSERT_TRUE(contains(s, split_out1));
+  ASSERT_TRUE(contains(s, split_out2));
+  ASSERT_TRUE(contains(s, split_dim));
+  ASSERT_TRUE(contains(s, output));
+  ASSERT_FALSE(contains(s, relu1));
+  ASSERT_FALSE(contains(s, relu2));
+}
diff --git a/compiler/luci/pass/src/helpers/NodeFiller.cpp b/compiler/luci/pass/src/helpers/NodeFiller.cpp

new file mode 100644 (file)

index 0000000..b141665
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/NodeFiller.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeFiller.h"
+
+// NOTE Do NOT delete this file; this file enforces compiler to check whether 'NodeFiller.h' is
+//      complete.
diff --git a/compiler/luci/pass/src/helpers/NodeFiller.h b/compiler/luci/pass/src/helpers/NodeFiller.h

new file mode 100644 (file)

index 0000000..b80f085
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/NodeFiller.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace luci
+{
+
+/**
+ * INTRODUCTION
+ *         Binary operation f(x,y) is 'commutative' when
+ *         f(x,y) == f(y,x) holds for all x, y.
+ *         For examples, ADD, MUL and SQUARED_DIFFERENCE are commutative.
+ *         These helpers make it easy to find commutative arguments of commutative node.
+ *
+ * HOW TO USE
+ *         COMM_NODE *node;
+ *         ARG_TYPE_1 *arg1;
+ *         ARG_TYPE_2 *arg2;
+ *
+ *         bool ok = fill(&arg1, &arg2).with_commutative_args_of(node);
+ *
+ * Result
+ *         If 'node's commutative argument types are actually {ARG_TYPE_1, ARG_TYPE_2}
+ *         (as a set), 'arg1' and 'arg2' set as actual 'node's arguments with matching
+ *         type, and return value 'ok' is true.
+ *         Otherwise, 'arg1' and 'arg2' not changed, 'ok' is false.
+ */
+
+template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final
+{
+public:
+  NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)
+  {
+    // DO NOTHING
+  }
+
+  /**
+   * @return true   When 'node's argument types are 'ARG_TYPE_1' and 'ARG_TYPE_2'
+   *                In such case, it assign '_arg_1' and '_arg_2' to actual arguments
+   *
+   * @return false  When 'node's argument types are NOT matched with 'ARG_TYPE_*'
+   *                In such case, it does not amend '_arg_1' and '_arg_2'
+   *
+   * @require       COMM_NODE has member x() and y()
+   */
+  template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
+
+private:
+  ARG_TYPE_1 **_arg_1;
+  ARG_TYPE_2 **_arg_2;
+};
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
+{
+  return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};
+}
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+template <class COMM_NODE>
+bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)
+{
+  // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2
+  {
+    auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
+    auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
+
+    if (x && y)
+    {
+      *_arg_1 = x;
+      *_arg_2 = y;
+      return true;
+    }
+  }
+
+  // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1
+  {
+    auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());
+    auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());
+
+    if (x && y)
+    {
+      *_arg_1 = y;
+      *_arg_2 = x;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/NodeFiller.test.cpp b/compiler/luci/pass/src/helpers/NodeFiller.test.cpp

new file mode 100644 (file)

index 0000000..9bbc7f2
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/NodeFiller.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+#include "NodeFiller.h"
+
+TEST(NodeFillerTest, simple_test)
+{
+  luci::CircleConst maxi_const;
+  luci::CircleMinimum mini;
+  luci::CircleMaximum maxi;
+  maxi.x(&maxi_const);
+  maxi.y(&mini);
+
+  luci::CircleConst *x = nullptr;
+  luci::CircleMinimum *y = nullptr;
+
+  EXPECT_TRUE(luci::fill(&x, &y).with_commutative_args_of(&maxi));
+  EXPECT_TRUE(x == &maxi_const);
+  EXPECT_TRUE(y == &mini);
+
+  x = nullptr;
+  y = nullptr;
+
+  EXPECT_TRUE(luci::fill(&y, &x).with_commutative_args_of(&maxi));
+  EXPECT_TRUE(x == &maxi_const);
+  EXPECT_TRUE(y == &mini);
+}
+
+TEST(NodeFillerTest, wrong_condition_NEG)
+{
+  luci::CircleConst add_const;
+  luci::CircleMinimum mini;
+  luci::CircleAdd add;
+  add.x(&add_const);
+  add.y(&mini);
+
+  luci::CircleMul *x = nullptr;
+  luci::CircleMinimum *y = nullptr;
+
+  EXPECT_FALSE(luci::fill(&x, &y).with_commutative_args_of(&add));
+  EXPECT_FALSE(luci::fill(&y, &x).with_commutative_args_of(&add));
+}
diff --git a/compiler/luci/pass/src/helpers/Strings.cpp b/compiler/luci/pass/src/helpers/Strings.cpp

new file mode 100644 (file)

index 0000000..d020f6d
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/Strings.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Strings.h"
+
+#include <algorithm>
+
+namespace luci
+{
+
+bool in_array(const std::string &str, const std::vector<std::string> &array)
+{
+  return std::find(array.begin(), array.end(), str) != array.end();
+}
+
+std::string to_string(const std::vector<std::string> &strings)
+{
+  assert(!strings.empty());
+
+  std::string res;
+  for (unsigned int i = 0; i < strings.size() - 1; i++)
+    res += strings[i] + ", ";
+
+  res += strings[strings.size() - 1];
+  return res;
+}
+
+std::string to_lower_case(std::string s)
+{
+  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
+  return s;
+}
+
+loco::DataType str_to_dtype(const std::string &str)
+{
+  if (to_lower_case(str).compare("uint8") == 0)
+    return loco::DataType::U8;
+  if (to_lower_case(str).compare("uint16") == 0)
+    return loco::DataType::U16;
+  if (to_lower_case(str).compare("uint32") == 0)
+    return loco::DataType::U32;
+  if (to_lower_case(str).compare("uint64") == 0)
+    return loco::DataType::U64;
+
+  if (to_lower_case(str).compare("int8") == 0)
+    return loco::DataType::S8;
+  if (to_lower_case(str).compare("int16") == 0)
+    return loco::DataType::S16;
+  if (to_lower_case(str).compare("int32") == 0)
+    return loco::DataType::S32;
+  if (to_lower_case(str).compare("int64") == 0)
+    return loco::DataType::S64;
+
+  if (to_lower_case(str).compare("float16") == 0)
+    return loco::DataType::FLOAT16;
+  if (to_lower_case(str).compare("float32") == 0)
+    return loco::DataType::FLOAT32;
+  if (to_lower_case(str).compare("float64") == 0)
+    return loco::DataType::FLOAT64;
+
+  if (to_lower_case(str).compare("bool") == 0)
+    return loco::DataType::BOOL;
+
+  return loco::DataType::Unknown;
+}
+
+QuantizationGranularity str_to_granularity(const std::string &str)
+{
+  if (to_lower_case(str).compare("layer") == 0)
+    return QuantizationGranularity::LayerWise;
+
+  if (to_lower_case(str).compare("channel") == 0)
+    return QuantizationGranularity::ChannelWise;
+
+  throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/Strings.h b/compiler/luci/pass/src/helpers/Strings.h

new file mode 100644 (file)

index 0000000..793d137
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/Strings.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_STRINGS_H__
+#define __LUCI_PASS_HELPERS_STRINGS_H__
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <loco.h>
+
+#include <vector>
+#include <sstream>
+#include <string>
+
+namespace luci
+{
+
+bool in_array(const std::string &, const std::vector<std::string> &);
+
+std::string to_string(const std::vector<std::string> &);
+
+std::string to_lower_case(std::string);
+
+loco::DataType str_to_dtype(const std::string &);
+
+QuantizationGranularity str_to_granularity(const std::string &);
+
+template <typename T> std::vector<T> csv_to_vector(const std::string &str)
+{
+  std::vector<T> ret;
+  std::istringstream is(str);
+  for (T i; is >> i;)
+  {
+    assert(i != ',');
+    ret.push_back(i);
+    if (is.peek() == ',')
+      is.ignore();
+  }
+  return ret;
+}
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_STRINGS_H__
diff --git a/compiler/luci/pass/src/helpers/Strings.test.cpp b/compiler/luci/pass/src/helpers/Strings.test.cpp

new file mode 100644 (file)

index 0000000..f6bb489
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/Strings.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Strings.h"
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <gtest/gtest.h>
+
+TEST(StringsTest, str_to_dtype)
+{
+  ASSERT_EQ(loco::DataType::U8, luci::str_to_dtype("uint8"));
+  ASSERT_EQ(loco::DataType::U16, luci::str_to_dtype("uint16"));
+  ASSERT_EQ(loco::DataType::U32, luci::str_to_dtype("uint32"));
+  ASSERT_EQ(loco::DataType::U64, luci::str_to_dtype("uint64"));
+
+  ASSERT_EQ(loco::DataType::S8, luci::str_to_dtype("int8"));
+  ASSERT_EQ(loco::DataType::S16, luci::str_to_dtype("int16"));
+  ASSERT_EQ(loco::DataType::S32, luci::str_to_dtype("int32"));
+  ASSERT_EQ(loco::DataType::S64, luci::str_to_dtype("int64"));
+
+  ASSERT_EQ(loco::DataType::FLOAT16, luci::str_to_dtype("float16"));
+  ASSERT_EQ(loco::DataType::FLOAT32, luci::str_to_dtype("float32"));
+  ASSERT_EQ(loco::DataType::FLOAT64, luci::str_to_dtype("float64"));
+
+  ASSERT_EQ(loco::DataType::BOOL, luci::str_to_dtype("bool"));
+
+  ASSERT_EQ(loco::DataType::Unknown, luci::str_to_dtype("foo"));
+}
+
+TEST(StringsTest, str_to_granularity)
+{
+  ASSERT_EQ(luci::QuantizationGranularity::LayerWise, luci::str_to_granularity("layer"));
+  ASSERT_EQ(luci::QuantizationGranularity::ChannelWise, luci::str_to_granularity("channel"));
+
+  EXPECT_THROW(luci::str_to_granularity("foo"), std::runtime_error);
+}
+
+TEST(StringsTest, csv_to_vector_int32)
+{
+  auto ret = luci::csv_to_vector<int32_t>("1,2,3");
+  ASSERT_EQ(3, ret.size());
+  ASSERT_EQ(1, ret.at(0));
+  ASSERT_EQ(3, ret.at(2));
+}
diff --git a/compiler/luci/pass/src/helpers/TypeMapper.cpp b/compiler/luci/pass/src/helpers/TypeMapper.cpp

new file mode 100644 (file)

index 0000000..ffa0159
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/TypeMapper.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TypeMapper.h"
+
+// NOTE Do NOT delete this file; this file enforces compiler to check whether 'TypeMapper.h' is
+//      complete.
diff --git a/compiler/luci/pass/src/helpers/TypeMapper.h b/compiler/luci/pass/src/helpers/TypeMapper.h

new file mode 100644 (file)

index 0000000..90760e9
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/TypeMapper.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <loco/IR/DataType.h>
+
+#include <cstdint>
+
+namespace luci
+{
+
+/**
+ * @brief TypeMapper maps between c++ primitive data type and loco::DataType.
+ */
+template <typename T> struct TypeMapper
+{
+  static constexpr loco::DataType get() { return loco::DataType::Unknown; }
+};
+
+template <> struct TypeMapper<float>
+{
+  static constexpr loco::DataType get() { return loco::DataType::FLOAT32; }
+};
+
+template <> struct TypeMapper<uint8_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::U8; }
+};
+
+template <> struct TypeMapper<uint16_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::U16; }
+};
+
+template <> struct TypeMapper<uint32_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::U32; }
+};
+
+template <> struct TypeMapper<uint64_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::U64; }
+};
+
+template <> struct TypeMapper<int8_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::S8; }
+};
+
+template <> struct TypeMapper<int16_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::S16; }
+};
+
+template <> struct TypeMapper<int32_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::S32; }
+};
+
+template <> struct TypeMapper<int64_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::S64; }
+};
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/TypeMapper.test.cpp b/compiler/luci/pass/src/helpers/TypeMapper.test.cpp

new file mode 100644 (file)

index 0000000..a7ac08a
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/TypeMapper.test.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+#include "TypeMapper.h"
+
+#include <vector>
+
+namespace
+{
+
+template <typename T> bool fill_const_node(luci::CircleConst *node, std::vector<T> &data)
+{
+  if (node->dtype() != luci::TypeMapper<T>::get())
+    return false;
+
+  node->size<luci::TypeMapper<T>::get()>(data.size());
+  for (uint32_t i = 0; i < data.size(); i++)
+  {
+    node->at<luci::TypeMapper<T>::get()>(i) = data.at(i);
+  }
+
+  return true;
+}
+
+class STRANGER
+{
+};
+
+} // namespace
+
+TEST(TypeMapperTest, simple_test)
+{
+  EXPECT_EQ(loco::DataType::FLOAT32, luci::TypeMapper<float>::get());
+  EXPECT_EQ(loco::DataType::U8, luci::TypeMapper<uint8_t>::get());
+  EXPECT_EQ(loco::DataType::U16, luci::TypeMapper<uint16_t>::get());
+  EXPECT_EQ(loco::DataType::U32, luci::TypeMapper<uint32_t>::get());
+  EXPECT_EQ(loco::DataType::U64, luci::TypeMapper<uint64_t>::get());
+  EXPECT_EQ(loco::DataType::S8, luci::TypeMapper<int8_t>::get());
+  EXPECT_EQ(loco::DataType::S16, luci::TypeMapper<int16_t>::get());
+  EXPECT_EQ(loco::DataType::S32, luci::TypeMapper<int32_t>::get());
+  EXPECT_EQ(loco::DataType::S64, luci::TypeMapper<int64_t>::get());
+}
+
+TEST(TypeMapperTest, with_template_test)
+{
+  std::vector<int32_t> int32_vec{0, 1, 2, 3, 4, 5, 6, 7};
+  luci::CircleConst const_node;
+  const_node.dtype(loco::DataType::S32);
+  EXPECT_TRUE(fill_const_node(&const_node, int32_vec));
+  EXPECT_EQ(8, const_node.size<loco::DataType::S32>());
+  EXPECT_EQ(0, const_node.at<loco::DataType::S32>(0));
+  EXPECT_EQ(1, const_node.at<loco::DataType::S32>(1));
+  EXPECT_EQ(2, const_node.at<loco::DataType::S32>(2));
+  EXPECT_EQ(3, const_node.at<loco::DataType::S32>(3));
+  EXPECT_EQ(4, const_node.at<loco::DataType::S32>(4));
+  EXPECT_EQ(5, const_node.at<loco::DataType::S32>(5));
+  EXPECT_EQ(6, const_node.at<loco::DataType::S32>(6));
+  EXPECT_EQ(7, const_node.at<loco::DataType::S32>(7));
+
+  std::vector<float> f32_vec{0.0, 1.1, 2.2, 3.3, 4.4, 5.5};
+  const_node.dtype(loco::DataType::FLOAT32);
+  EXPECT_FALSE(fill_const_node(&const_node, int32_vec));
+  EXPECT_TRUE(fill_const_node(&const_node, f32_vec));
+  EXPECT_EQ(6, const_node.size<loco::DataType::FLOAT32>());
+  EXPECT_FLOAT_EQ(0.0, const_node.at<loco::DataType::FLOAT32>(0));
+  EXPECT_FLOAT_EQ(1.1, const_node.at<loco::DataType::FLOAT32>(1));
+  EXPECT_FLOAT_EQ(2.2, const_node.at<loco::DataType::FLOAT32>(2));
+  EXPECT_FLOAT_EQ(3.3, const_node.at<loco::DataType::FLOAT32>(3));
+  EXPECT_FLOAT_EQ(4.4, const_node.at<loco::DataType::FLOAT32>(4));
+  EXPECT_FLOAT_EQ(5.5, const_node.at<loco::DataType::FLOAT32>(5));
+}
+
+TEST(TypeMapperTest, wrong_condition_NEG)
+{
+  EXPECT_EQ(loco::DataType::Unknown, luci::TypeMapper<STRANGER>::get());
+}
diff --git a/compiler/luci/pass/src/test/TestFirstNode.h b/compiler/luci/pass/src/test/TestFirstNode.h

new file mode 100644 (file)

index 0000000..21f859f
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestFirstNode.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_FIRST_NODE_H__
+#define __LUCI_PASS_TEST_FIRST_NODE_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco.h>
+
+namespace luci
+{
+namespace test
+{
+
+template <class T> T *first_node(loco::Graph *g)
+{
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto target_node = dynamic_cast<T *>(node);
+    if (target_node != nullptr)
+      return target_node;
+  }
+  return nullptr;
+}
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_FIRST_NODE_H__
diff --git a/compiler/luci/pass/src/test/TestFirstNode.test.cpp b/compiler/luci/pass/src/test/TestFirstNode.test.cpp

new file mode 100644 (file)

index 0000000..b07ac61
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestFirstNode.test.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestFirstNode.h"
+
+// This file validates "TestFirstNode.h". Pleaes DO NOT remove this file.
diff --git a/compiler/luci/pass/src/test/TestIOGraph.h b/compiler/luci/pass/src/test/TestIOGraph.h

new file mode 100644 (file)

index 0000000..b1fc41f
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestIOGraph.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_IO_GRAPH_H__
+#define __LUCI_PASS_TEST_IO_GRAPH_H__
+
+#include "TestShape.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+namespace test
+{
+
+/**
+ * @brief Graphlet with Inputs and loco::Graph for multiple inputs
+ * @note  Every Graph will have Input(s) and Output(s)
+ *        We put loco::Graph only in IsGraphlet not to declare separate
+ *        class for loco::Graph
+ */
+template <unsigned N> class TestIsGraphlet
+{
+public:
+  TestIsGraphlet()
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_inputs[n] = nullptr;
+      _inputs[n] = nullptr;
+    }
+  }
+
+public:
+  virtual void init(loco::Graph *g, const ShapeU32 shape_in)
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_inputs[n] = g->inputs()->create();
+
+      _inputs[n] = g->nodes()->create<luci::CircleInput>();
+      _inputs[n]->shape(shape_in);
+      _inputs[n]->shape_status(luci::ShapeStatus::VALID);
+      _inputs[n]->dtype(loco::DataType::FLOAT32);
+      _inputs[n]->name("input_" + std::to_string(n));
+
+      _inputs[n]->index(_graph_inputs[n]->index());
+
+      auto input_shape = std::make_unique<loco::TensorShape>();
+      set_shape_vector(input_shape.get(), shape_in);
+      _graph_inputs[n]->shape(std::move(input_shape));
+      _graph_inputs[n]->dtype(loco::DataType::FLOAT32);
+    }
+  }
+
+public:
+  loco::Graph *g(void) { return &_g; }
+  luci::CircleInput *input(int idx) { return _inputs[idx]; }
+
+protected:
+  loco::Graph _g;
+  std::array<loco::GraphInput *, N> _graph_inputs;
+  std::array<luci::CircleInput *, N> _inputs;
+};
+
+/**
+ * @brief Graphlet with one Input
+ */
+class TestIGraphlet : public TestIsGraphlet<1>
+{
+public:
+  luci::CircleInput *input() { return _inputs[0]; }
+};
+
+/**
+ * @brief Graphlet with Outputs for multiple outputs
+ */
+template <unsigned N> class TestOsGraphlet
+{
+public:
+  TestOsGraphlet()
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_outputs[n] = nullptr;
+      _outputs[n] = nullptr;
+    }
+  }
+
+public:
+  virtual void init(loco::Graph *g, const ShapeU32 shape_out)
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_outputs[n] = g->outputs()->create();
+
+      _outputs[n] = g->nodes()->create<luci::CircleOutput>();
+      _outputs[n]->shape(shape_out);
+      _outputs[n]->shape_status(luci::ShapeStatus::VALID);
+      _outputs[n]->dtype(loco::DataType::FLOAT32);
+      _outputs[n]->name("output_" + std::to_string(n));
+
+      _outputs[n]->index(_graph_outputs[n]->index());
+
+      auto output_shape = std::make_unique<loco::TensorShape>();
+      set_shape_vector(output_shape.get(), shape_out);
+      _graph_outputs[n]->shape(std::move(output_shape));
+      _graph_outputs[n]->dtype(loco::DataType::FLOAT32);
+    }
+  }
+
+public:
+  luci::CircleOutput *output(int idx) { return _outputs[idx]; }
+
+protected:
+  std::array<loco::GraphOutput *, N> _graph_outputs;
+  std::array<luci::CircleOutput *, N> _outputs;
+};
+
+/**
+ * @brief Graphlet with one Output
+ */
+class TestOGraphlet : public TestOsGraphlet<1>
+{
+public:
+  luci::CircleOutput *output() { return _outputs[0]; }
+};
+
+/**
+ * @brief Graph with Input and Output
+ */
+class TestIOGraph : public TestIGraphlet, public TestOGraphlet
+{
+public:
+  TestIOGraph() = default;
+
+public:
+  virtual void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIsGraphlet<1>::init(g(), shape_in);
+    TestOsGraphlet<1>::init(g(), shape_out);
+  }
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_IO_GRAPH_H__
diff --git a/compiler/luci/pass/src/test/TestIOGraph.test.cpp b/compiler/luci/pass/src/test/TestIOGraph.test.cpp

new file mode 100644 (file)

index 0000000..e58a13f
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestIOGraph.test.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestIOGraph.h"
+
+// This file validates "TestIOGraph.h". Pleaes DO NOT remove this file.
diff --git a/compiler/luci/pass/src/test/TestShape.h b/compiler/luci/pass/src/test/TestShape.h

new file mode 100644 (file)

index 0000000..ccc55c9
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestShape.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_SHAPE_H__
+#define __LUCI_PASS_TEST_SHAPE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <initializer_list>
+
+namespace luci
+{
+namespace test
+{
+
+using ShapeU32 = std::initializer_list<uint32_t>;
+using ShapeI32 = std::initializer_list<int32_t>;
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values);
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values);
+
+uint32_t num_elements(const ShapeU32 shape);
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_SHAPE_H__
diff --git a/compiler/luci/pass/src/test/TestShape.test.cpp b/compiler/luci/pass/src/test/TestShape.test.cpp

new file mode 100644 (file)

index 0000000..39790c6
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestShape.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestShape.h"
+
+/**
+ * @note This file does not hold any test cases but provides methods for tests
+ */
+
+namespace luci
+{
+namespace test
+{
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values)
+{
+  uint32_t r = 0;
+  shape->rank(values.size());
+  for (auto v : values)
+    shape->dim(r++).set(v);
+}
+
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values)
+{
+  const_node->rank(1);
+  const_node->dim(0).set(values.size());
+  const_node->shape_status(luci::ShapeStatus::VALID);
+  const_node->dtype(loco::DataType::S32);
+  const_node->size<loco::DataType::S32>(values.size());
+  uint32_t idx = 0;
+  for (auto val : values)
+    const_node->at<loco::DataType::S32>(idx++) = val;
+}
+
+uint32_t num_elements(const ShapeU32 shape)
+{
+  uint32_t result = 1;
+  for (auto val : shape)
+    result = result * val;
+  return result;
+}
+
+} // namespace test
+} // namespace luci
diff --git a/compiler/luci/profile/CMakeLists.txt b/compiler/luci/profile/CMakeLists.txt

new file mode 100644 (file)

index 0000000..f2c6665
--- /dev/null
+++ b/compiler/luci/profile/CMakeLists.txt
@@ -0,0 +1,22 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_profile SHARED ${SOURCES})
+target_include_directories(luci_profile PRIVATE src)
+target_include_directories(luci_profile PUBLIC include)
+target_link_libraries(luci_profile PUBLIC loco)
+target_link_libraries(luci_profile PUBLIC luci_lang)
+
+install(TARGETS luci_profile DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_profile_test ${TESTS})
+target_include_directories(luci_profile_test PRIVATE src)
+target_link_libraries(luci_profile_test luci_lang)
+target_link_libraries(luci_profile_test luci_profile)
diff --git a/compiler/luci/profile/README.md b/compiler/luci/profile/README.md

new file mode 100644 (file)

index 0000000..577e60a
--- /dev/null
+++ b/compiler/luci/profile/README.md
@@ -0,0 +1,119 @@
+# luci-profile
+
+`luci-profile` provides profiling related items.
+
+## CircleNodeOrigin
+
+`CircleNodeOrigin` allow us know where some node is originated from.
+
+Let's assume following graph transformations are done.
+
+```
+    |                          |                         |
+ [node1] --------+             |                         |
+(id = 1)         |             |                         |
+    |            +--------> [node5] ----------------> [node6]
+    |            |     (origin = [1,2])          (origin = [1,2])
+ [node2] --------+             |                         |
+(id = 2)                       |                         |
+    |                          |                         |
+ [node3] -----------------> [node3] --------+-------> [node3]
+(id = 3)                (origin = [3])      |    (origin = [3,4])
+    |                          |            |            |
+ [node4] -----------------> [node4] --------+            |
+(id = 4)                (origin = [4])                   |
+    |                          |                         |
+
+<Circle1> -- optimizer --> <circle2> -- quantizer --> <circle3>
+```
+
+The most important purpose of using `CircleNodeOrigin` is preserving origin information.
+Following changes show how origin information is preserved even after graph is transformed.
+
+- `node3`
+  - `node4` is absorbed to **existing** `node3`.
+  - origin of `node4` is absorbed to origin of `node3`.
+- `node5`
+  - `node1` and `node2` are fused to **newly created** `node5`.
+  - origin of `node1` and `node2` are inherited to origin of `node4`.
+- `node6`
+   - `node5` is **replaced with newly created** `node6`.
+   - origin of `node5` is copied to origin of `node6`.
+
+**Therefore, when using `CircleNodeOrigin`, please aware of the most important principle. "Preserve origin information"**
+
+Next items are about implementation details to store the origin information.
+
+### Source Table
+
+Source table includes a set of id and name of origin node.
+
+#### Binary format
+
+```
+[ entry_number : uint32_t ]
+[ id : uint32_t ][ length : uint32_t ][ data : char * length ] * entry_number
+```
+- entry_number : The number of entries
+  - Each entry consists of id, length, and data.
+- id : ID of origin node
+- length : Length of data
+- data : Name of origin node **(null-terminated string)**
+
+#### In-memory format
+```cpp
+// size = entry_number
+std::map<uint32_t /* id */, std::string /* name */>
+```
+
+#### Example
+
+Following example means "Name of origin 1 is node1".
+
+```
+[Binary Format]
+ 0x01 00 00 00 0x01 00 00 00 0x06 00 00 00 0x6e 0x6f 0x64 0x65 0x31 00
+ ------------- ------------- ------------- ---- ---- ---- ---- ---- ----
+entry_number=1      id=1        length=6   'n'  'o'  'd'  'e'  '1'  '\0'
+```
+```cpp
+[In-memory Format]
+std::map<uint32_t, std::string>({1, "node1"});
+```
+
+### Op Table
+
+Op table includes a set of id of operation and id(s) of operation's origin nodes.
+
+#### Binary format
+
+Op table is stored in circle file as binary with following format.
+```
+[ entry_number : uint32_t ]
+[ id : uint32_t ][ node_num : uint32_t ][ node_ids : uint32_t * node_num ] * entry_number
+```
+- entry_number : The number of entries
+  - Each entry consists of id, node_num, and node_ids.
+- id : ID of operation in circle model file
+- node_num : The number of operation's origin nodes
+- node_ids : Set of IDs of origin nodes
+
+#### In-memory format
+```cpp
+std::map<uint32_t /* id */, std::set<uint32_t> /* node_ids */>
+```
+
+#### Example
+
+Following example means "Operation 5 is originated from origin 1 and origin 2".
+
+```
+[Binary Format]
+ 0x01 00 00 00 0x05 00 00 00 0x02 00 00 00 0x01 00 00 00 0x02 00 00 00
+ ------------- ------------- ------------- ---------------------------
+entry_number=1      id=5       node_num=2        node_ids : 1, 2
+```
+```cpp
+[In-memory Format]
+std::map<uint32_t, std::set<uint32_t>>({5, std::set{1, 2}});
+```
diff --git a/compiler/luci/profile/include/luci/Profile/CircleNodeID.h b/compiler/luci/profile/include/luci/Profile/CircleNodeID.h

new file mode 100644 (file)

index 0000000..165866b
--- /dev/null
+++ b/compiler/luci/profile/include/luci/Profile/CircleNodeID.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROFILE_CIRCLE_NODE_ID_H__
+#define __LUCI_PROFILE_CIRCLE_NODE_ID_H__
+
+#include <luci/IR/CircleNode.h>
+
+namespace luci
+{
+
+using CircleNodeID = uint32_t;
+
+bool has_node_id(const luci::CircleNode *circle_node);
+
+void set_node_id(luci::CircleNode *circle_node, CircleNodeID id);
+
+CircleNodeID get_node_id(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_PROFILE_CIRCLE_NODE_ID_H__
diff --git a/compiler/luci/profile/include/luci/Profile/CircleNodeOrigin.h b/compiler/luci/profile/include/luci/Profile/CircleNodeOrigin.h

new file mode 100644 (file)

index 0000000..2d6558c
--- /dev/null
+++ b/compiler/luci/profile/include/luci/Profile/CircleNodeOrigin.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
+#define __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
+
+#include "CircleNodeID.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <set>
+
+namespace luci
+{
+
+class CircleNodeOrigin
+{
+protected:
+  struct Source
+  {
+  public:
+    std::string name(void) const { return _name; }
+    void name(const std::string &name) { _name = name; }
+
+    uint32_t id(void) const { return _id; }
+    void id(const uint32_t id) { _id = id; }
+
+  private:
+    std::string _name;
+    uint32_t _id = 0;
+  };
+
+public:
+  virtual std::set<const Source *> sources(void) const = 0;
+};
+
+std::shared_ptr<CircleNodeOrigin> single_origin(uint32_t id, const std::string &name);
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::initializer_list<std::shared_ptr<CircleNodeOrigin>> origins);
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::vector<std::shared_ptr<CircleNodeOrigin>> &origins);
+
+} // namespace luci
+
+namespace luci
+{
+
+bool has_origin(const luci::CircleNode *circle_node);
+
+void add_origin(luci::CircleNode *circle_node, const std::shared_ptr<CircleNodeOrigin> origin);
+
+// NOTE When circle_node does not have origin, nullptr is returned
+const std::shared_ptr<luci::CircleNodeOrigin> get_origin(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
diff --git a/compiler/luci/profile/src/CircleNodeID.cpp b/compiler/luci/profile/src/CircleNodeID.cpp

new file mode 100644 (file)

index 0000000..750b36c
--- /dev/null
+++ b/compiler/luci/profile/src/CircleNodeID.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+
+#include <loco.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for circle node id
+ * @note  Once CircleNodeID is annotated, it should not be changed.
+ *        If CircleNodeID is needed to be changed, create new CircleNodeID.
+ */
+class CircleNodeIDAnnotation final : public loco::NodeAnnotation
+{
+public:
+  CircleNodeIDAnnotation() = delete;
+
+  CircleNodeIDAnnotation(luci::CircleNodeID node_id) : _node_id{node_id}
+  {
+    // Do nothing
+  }
+
+public:
+  luci::CircleNodeID node_id(void) const { return _node_id; }
+  // No setter
+
+private:
+  luci::CircleNodeID _node_id;
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool has_node_id(const luci::CircleNode *circle_node)
+{
+  return circle_node->annot<CircleNodeIDAnnotation>() != nullptr;
+}
+
+void set_node_id(luci::CircleNode *circle_node, luci::CircleNodeID id)
+{
+  circle_node->annot<CircleNodeIDAnnotation>(nullptr);
+  circle_node->annot(std::make_unique<CircleNodeIDAnnotation>(id));
+}
+
+luci::CircleNodeID get_node_id(const luci::CircleNode *circle_node)
+{
+  if (!has_node_id(circle_node))
+    throw std::runtime_error("Cannot find CircleNodeID");
+
+  return circle_node->annot<CircleNodeIDAnnotation>()->node_id();
+}
+
+} // namespace luci
diff --git a/compiler/luci/profile/src/CircleNodeID.test.cpp b/compiler/luci/profile/src/CircleNodeID.test.cpp

new file mode 100644 (file)

index 0000000..d80c09b
--- /dev/null
+++ b/compiler/luci/profile/src/CircleNodeID.test.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(LuciCircleNodeID, simple_circle_node_id)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(has_node_id(add));
+
+  set_node_id(add, 3);
+
+  ASSERT_TRUE(has_node_id(add));
+  ASSERT_EQ(3, get_node_id(add));
+}
+
+TEST(LuciCircleNodeID, simple_circle_node_id_NEG)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(has_node_id(add));
+
+  ASSERT_ANY_THROW(get_node_id(add));
+}
diff --git a/compiler/luci/profile/src/CircleNodeOrigin.cpp b/compiler/luci/profile/src/CircleNodeOrigin.cpp

new file mode 100644 (file)

index 0000000..0a731a9
--- /dev/null
+++ b/compiler/luci/profile/src/CircleNodeOrigin.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeOrigin.h"
+
+#include <loco.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for recording origin information
+ * @note  Once CircleNodeOrigin is annotated, it should not be changed.
+ *        If CircleNodeOrigin is needed to be changed, create new CircleNodeOrigin.
+ */
+class CircleNodeOriginAnnotation final : public loco::NodeAnnotation
+{
+public:
+  CircleNodeOriginAnnotation() = delete;
+
+  CircleNodeOriginAnnotation(const std::shared_ptr<luci::CircleNodeOrigin> origin) : _origin(origin)
+  {
+    // Do nothing
+  }
+
+public:
+  const std::shared_ptr<luci::CircleNodeOrigin> origin(void) const { return _origin; }
+  // No setter
+
+private:
+  const std::shared_ptr<luci::CircleNodeOrigin> _origin;
+};
+
+} // namespace
+
+namespace
+{
+
+class SingleOrigin final : public luci::CircleNodeOrigin
+{
+public:
+  SingleOrigin() = delete;
+
+  SingleOrigin(uint32_t id, const std::string &name)
+  {
+    _source.id(id);
+    _source.name(name);
+  }
+
+public:
+  std::set<const Source *> sources(void) const final
+  {
+    std::set<const Source *> res;
+    res.emplace(&_source);
+    return res;
+  }
+
+private:
+  Source _source;
+};
+
+class CompositeOrigin final : public luci::CircleNodeOrigin
+{
+public:
+  CompositeOrigin() = delete;
+
+  template <typename T> CompositeOrigin(T origins)
+  {
+    if (origins.size() == 0)
+      throw std::invalid_argument("No origins provided");
+
+    for (auto &origin : origins)
+    {
+      if (origin != nullptr)
+        _origins.emplace_back(origin);
+    }
+  }
+
+public:
+  std::set<const Source *> sources(void) const final
+  {
+    std::set<const Source *> res;
+
+    for (auto &origin : _origins)
+    {
+      for (auto source : origin->sources())
+      {
+        res.emplace(source);
+      }
+    }
+
+    return res;
+  }
+
+private:
+  std::vector<std::shared_ptr<CircleNodeOrigin>> _origins;
+};
+
+} // namespace
+
+namespace luci
+{
+
+std::shared_ptr<CircleNodeOrigin> single_origin(uint32_t id, const std::string &name)
+{
+  return std::make_shared<SingleOrigin>(id, name);
+}
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::initializer_list<std::shared_ptr<CircleNodeOrigin>> origins)
+{
+  return std::make_shared<CompositeOrigin>(origins);
+}
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::vector<std::shared_ptr<CircleNodeOrigin>> &origins)
+{
+  return std::make_shared<CompositeOrigin>(origins);
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool has_origin(const luci::CircleNode *circle_node)
+{
+  return circle_node->annot<CircleNodeOriginAnnotation>() != nullptr;
+}
+
+/**
+ * @brief 'origin' is added to the existing origin of circle_node.
+ * @note  If 'origin' is nullptr, nothing is changed.
+ *        For more detail, please refer to CompositeOrigin constructor.
+ */
+void add_origin(luci::CircleNode *circle_node, const std::shared_ptr<CircleNodeOrigin> origin)
+{
+  auto new_origin = composite_origin({get_origin(circle_node), origin});
+  circle_node->annot<CircleNodeOriginAnnotation>(nullptr);
+  circle_node->annot(std::make_unique<CircleNodeOriginAnnotation>(new_origin));
+}
+
+const std::shared_ptr<luci::CircleNodeOrigin> get_origin(const luci::CircleNode *circle_node)
+{
+  if (!has_origin(circle_node))
+    return nullptr;
+
+  assert(circle_node->annot<CircleNodeOriginAnnotation>()->origin() != nullptr);
+  return circle_node->annot<CircleNodeOriginAnnotation>()->origin();
+}
+
+} // namespace luci
diff --git a/compiler/luci/profile/src/CircleNodeOrigin.test.cpp b/compiler/luci/profile/src/CircleNodeOrigin.test.cpp

new file mode 100644 (file)

index 0000000..34618e1
--- /dev/null
+++ b/compiler/luci/profile/src/CircleNodeOrigin.test.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+#include "luci/Profile/CircleNodeOrigin.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(LuciCircleNodeOrigin, simple_single_origin)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(has_origin(add));
+
+  auto origin = luci::single_origin(3, "add");
+  add_origin(add, origin);
+
+  ASSERT_TRUE(has_origin(add));
+
+  auto sources = get_origin(add)->sources();
+  ASSERT_EQ(1, sources.size());
+  for (auto source : sources)
+  {
+    ASSERT_EQ(3, source->id());
+    ASSERT_EQ(0, source->name().compare("add"));
+  }
+}
+
+TEST(LuciCircleNodeOrigin, simple_composite_origin_with_initializer)
+{
+  auto g = loco::make_graph();
+  auto mul = g->nodes()->create<luci::CircleMul>();
+
+  ASSERT_FALSE(has_origin(mul));
+
+  auto origin =
+    luci::composite_origin({luci::single_origin(3, "add"), luci::single_origin(7, "sub")});
+  add_origin(mul, origin);
+
+  ASSERT_TRUE(has_origin(mul));
+
+  bool add_origin_passed = false;
+  bool sub_origin_passed = false;
+  auto sources = get_origin(mul)->sources();
+  ASSERT_EQ(2, sources.size());
+  for (auto source : sources)
+  {
+    if (source->id() == 3 && source->name().compare("add") == 0)
+      add_origin_passed = true;
+    if (source->id() == 7 && source->name().compare("sub") == 0)
+      sub_origin_passed = true;
+  }
+
+  ASSERT_EQ(true, add_origin_passed);
+  ASSERT_EQ(true, sub_origin_passed);
+}
+
+TEST(LuciCircleNodeOrigin, simple_composite_origin_with_vector)
+{
+  auto g = loco::make_graph();
+  auto mul = g->nodes()->create<luci::CircleMul>();
+
+  ASSERT_FALSE(has_origin(mul));
+
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> vec;
+  vec.push_back(luci::single_origin(3, "add"));
+  vec.push_back(luci::single_origin(7, "sub"));
+  auto origin = luci::composite_origin(vec);
+  add_origin(mul, origin);
+
+  ASSERT_TRUE(has_origin(mul));
+
+  bool add_origin_passed = false;
+  bool sub_origin_passed = false;
+  auto sources = get_origin(mul)->sources();
+  ASSERT_EQ(2, sources.size());
+  for (auto source : sources)
+  {
+    if (source->id() == 3 && source->name().compare("add") == 0)
+      add_origin_passed = true;
+    if (source->id() == 7 && source->name().compare("sub") == 0)
+      sub_origin_passed = true;
+  }
+
+  ASSERT_EQ(true, add_origin_passed);
+  ASSERT_EQ(true, sub_origin_passed);
+}
+
+TEST(LuciCircleNodeOrigin, composite_origin_empty_ctor_NEG)
+{
+  ASSERT_ANY_THROW(luci::composite_origin({}));
+}
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt

index 9f50c9c4f5d8e38f56a0ef7186d20e7fa266dfc1..1c78031abc5295792b55512f429aaf1c51746a42 100644 (file)
--- a/compiler/luci/service/CMakeLists.txt
+++ b/compiler/luci/service/CMakeLists.txt
@@ -22,4 +22,5 @@ nnas_find_package(GTest REQUIRED)
  GTest_AddTest(luci_service_test ${TESTS})
  target_include_directories(luci_service_test PRIVATE src)
  target_link_libraries(luci_service_test luci_service)
+target_link_libraries(luci_service_test luci_testhelper)
  target_link_libraries(luci_service_test oops)
diff --git a/compiler/luci/service/include/luci/Service/CircleNodeClone.h b/compiler/luci/service/include/luci/Service/CircleNodeClone.h

new file mode 100644 (file)

index 0000000..2429997
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/CircleNodeClone.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_NODE_CLONE__
+#define __LUCI_CIRCLE_NODE_CLONE__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco/IR/Graph.h>
+
+namespace luci
+{
+
+/**
+ * @brief Copy common attributes of CircleNode from src to dst.
+ */
+void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst);
+
+/**
+ * @brief Return a new cloned CircleNode object with same attributes value of node to graph.
+ * @note  Will return nullptr if clone has failed
+ */
+CircleNode *clone_node(const CircleNode *node, loco::Graph *graph);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_NODE_CLONE__
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h

index c301db5f40ded5dd735d6b937dc5d7cb55bb6619..60bc16e485380d855cf39480c3d7ec84b557b308 100644 (file)
--- a/compiler/luci/service/include/luci/Service/CircleShapeInference.h
+++ b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
@@ -17,29 +17,15 @@
  #ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_H__
  #define __LUCI_CIRCLE_SHAPE_INFERENCE_H__
  
-#include "ShapeDescription.h"
-
  #include <loco/IR/Nodes.h>
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleShapeInferenceHelper.h>
+#include <luci/Service/CircleShapeInferenceRule.h>
  
  namespace luci
  {
  
-/**
- * @brief Get the shape of each node as a node annotation
- *
- * HOW TO USE
- *
- *   ShapeInference::get(g->nodes()->at(..));
- */
-struct ShapeInference
-{
-  static ShapeDescription get(loco::Node *node);
-};
-
  namespace sinf // namespace for Shape Inference
  {
  
@@ -52,7 +38,12 @@ class Algorithm final : public luci::CircleNodeVisitor<loco::TensorShape>
  {
  public:
    // TODO Remove this when all of visit function is implemented
-  loco::TensorShape visit(const luci::CircleNode *node) final { return sinf::circle_shape(node); }
+  loco::TensorShape visit(const luci::CircleNode *node) final
+  {
+    loco::NodeShape shape;
+    luci::CircleShapeInferenceRule().infer(node, shape);
+    return shape.as<loco::TensorShape>();
+  }
  
    // loco::TensorShape visit(const luci::CircleAbs *node) final;
    // loco::TensorShape visit(const luci::CircleAdd *node) final;
@@ -77,6 +68,7 @@ public:
    // loco::TensorShape visit(const luci::CircleEqual *node) final;
    // loco::TensorShape visit(const luci::CircleExp *node) final;
    // loco::TensorShape visit(const luci::CircleExpandDims *node) final;
+  // loco::TensorShape visit(const luci::CircleFakeQuant *node) final;
    // loco::TensorShape visit(const luci::CircleFill *node) final;
    // loco::TensorShape visit(const luci::CircleFloor *node) final;
    // loco::TensorShape visit(const luci::CircleFloorDiv *node) final;
@@ -106,10 +98,12 @@ public:
    // loco::TensorShape visit(const luci::CircleMean *node) final;
    // loco::TensorShape visit(const luci::CircleMinimum *node) final;
    // loco::TensorShape visit(const luci::CircleMirrorPad *node) final;
+  // loco::TensorShape visit(const luci::CircleMul *node) final;
    // loco::TensorShape visit(const luci::CircleNeg *node) final;
    // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4 *node) final;
    // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5 *node) final;
    // loco::TensorShape visit(const luci::CircleNotEqual *node) final;
+  // loco::TensorShape visit(const luci::CircleOneHot *node) final;
    // loco::TensorShape visit(const luci::CirclePack *node) final;
    // loco::TensorShape visit(const luci::CirclePad *node) final;
    // loco::TensorShape visit(const luci::CirclePadV2 *node) final;
@@ -117,8 +111,6 @@ public:
    // loco::TensorShape visit(const luci::CirclePRelu *node) final;
    // loco::TensorShape visit(const luci::CircleRange *node) final;
    // loco::TensorShape visit(const luci::CircleRank *node) final;
-  // loco::TensorShape visit(const luci::CircleMul *node) final;
-  // loco::TensorShape visit(const luci::CircleOneHot *node) final;
    // loco::TensorShape visit(const luci::CircleReduceAny *node) final;
    // loco::TensorShape visit(const luci::CircleReduceMax *node) final;
    // loco::TensorShape visit(const luci::CircleReduceMin *node) final;
@@ -171,14 +163,14 @@ public:
    // loco::TensorShape visit(const luci::CircleInstanceNorm *node) final;
  
    // Virtual
+  // loco::TensorShape visit(const luci::CircleCustomOut *node) final;
+  loco::TensorShape visit(const luci::CircleIfOut *node) final;
    // loco::TensorShape visit(const luci::CircleInput *node) final;
+  // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
+  // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
    // loco::TensorShape visit(const luci::CircleOutput *node) final;
    // loco::TensorShape visit(const luci::CircleOutputDummy *node) final;
    // loco::TensorShape visit(const luci::CircleOutputExclude *node) final;
-  // loco::TensorShape visit(const luci::CircleCustomOut *node) final;
-  // loco::TensorShape visit(const luci::CircleIfOut *node) final;
-  // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
-  // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
    // loco::TensorShape visit(const luci::CircleSplitOut *node) final;
    // loco::TensorShape visit(const luci::CircleSplitVOut *node) final;
    // loco::TensorShape visit(const luci::CircleTopKV2Out *node) final;
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h

deleted file mode 100644 (file)

index dd6a5a4..0000000
--- a/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
-#define __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
-
-#include <loco/IR/TensorShape.h>
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleShapeSignature.h>
-
-namespace luci
-{
-namespace sinf // Namespace for Shape Inference
-{
-
-// Return shape of circle node as loco::TensorShape
-loco::TensorShape circle_shape(const luci::CircleNode *node);
-
-} // namespace sinf
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInference.h b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInference.h

deleted file mode 100644 (file)

index f7ea89b..0000000
--- a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInference.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
-#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/IR/CircleShapeSignature.h>
-#include <luci/Service/CircleShapeSignatureInferenceHelper.h>
-
-namespace luci
-{
-
-namespace ssinf // namespace for Shape Signature Inference
-{
-
-struct Rule
-{
-  bool infer(const luci::CircleNode *, ShapeSignature &) const;
-};
-
-class Algorithm final : public luci::CircleNodeVisitor<ShapeSignature>
-{
-public:
-  // TODO Remove this when visit function is implemented for all the operations.
-  ShapeSignature visit(const luci::CircleNode *node) final { return node->shape_signature(); }
-
-  // ShapeSignature visit(const luci::CircleAbs *node) final;
-  // ShapeSignature visit(const luci::CircleAdd *node) final;
-  // ShapeSignature visit(const luci::CircleAddN *node) final;
-  // ShapeSignature visit(const luci::CircleArgMax *node) final;
-  // ShapeSignature visit(const luci::CircleArgMin *node) final;
-  // ShapeSignature visit(const luci::CircleAveragePool2D *node) final;
-  // ShapeSignature visit(const luci::CircleBatchMatMul *node) final;
-  // ShapeSignature visit(const luci::CircleBatchToSpaceND *node) final;
-  // ShapeSignature visit(const luci::CircleCast *node) final;
-  // ShapeSignature visit(const luci::CircleCeil *node) final;
-  // ShapeSignature visit(const luci::CircleConcatenation *node) final;
-  // ShapeSignature visit(const luci::CircleConst *node) final;
-  // ShapeSignature visit(const luci::CircleConv2D *node) final;
-  // ShapeSignature visit(const luci::CircleCos *node) final;
-  // ShapeSignature visit(const luci::CircleCustom *node) final;
-  // ShapeSignature visit(const luci::CircleDepthToSpace *node) final;
-  // ShapeSignature visit(const luci::CircleDepthwiseConv2D *node) final;
-  // ShapeSignature visit(const luci::CircleDequantize *node) final;
-  // ShapeSignature visit(const luci::CircleDiv *node) final;
-  // ShapeSignature visit(const luci::CircleElu *node) final;
-  // ShapeSignature visit(const luci::CircleEqual *node) final;
-  // ShapeSignature visit(const luci::CircleExp *node) final;
-  // ShapeSignature visit(const luci::CircleExpandDims *node) final;
-  // ShapeSignature visit(const luci::CircleFill *node) final;
-  // ShapeSignature visit(const luci::CircleFloor *node) final;
-  // ShapeSignature visit(const luci::CircleFloorDiv *node) final;
-  // ShapeSignature visit(const luci::CircleFloorMod *node) final;
-  // ShapeSignature visit(const luci::CircleFullyConnected *node) final;
-  // ShapeSignature visit(const luci::CircleGather *node) final;
-  // ShapeSignature visit(const luci::CircleGatherNd *node) final;
-  // ShapeSignature visit(const luci::CircleGreater *node) final;
-  // ShapeSignature visit(const luci::CircleGreaterEqual *node) final;
-  // ShapeSignature visit(const luci::CircleIf *node) final;
-  // ShapeSignature visit(const luci::CircleL2Normalize *node) final;
-  // ShapeSignature visit(const luci::CircleL2Pool2D *node) final;
-  // ShapeSignature visit(const luci::CircleLeakyRelu *node) final;
-  // ShapeSignature visit(const luci::CircleLess *node) final;
-  // ShapeSignature visit(const luci::CircleLessEqual *node) final;
-  // ShapeSignature visit(const luci::CircleLocalResponseNormalization *node) final;
-  // ShapeSignature visit(const luci::CircleLog *node) final;
-  // ShapeSignature visit(const luci::CircleLogicalAnd *node) final;
-  // ShapeSignature visit(const luci::CircleLogicalNot *node) final;
-  // ShapeSignature visit(const luci::CircleLogicalOr *node) final;
-  // ShapeSignature visit(const luci::CircleLogistic *node) final;
-  // ShapeSignature visit(const luci::CircleLogSoftmax *node) final;
-  // ShapeSignature visit(const luci::CircleMatrixDiag *node) final;
-  // ShapeSignature visit(const luci::CircleMatrixSetDiag *node) final;
-  // ShapeSignature visit(const luci::CircleMaximum *node) final;
-  // ShapeSignature visit(const luci::CircleMaxPool2D *node) final;
-  ShapeSignature visit(const luci::CircleMean *node) final;
-  // ShapeSignature visit(const luci::CircleMinimum *node) final;
-  // ShapeSignature visit(const luci::CircleMirrorPad *node) final;
-  // ShapeSignature visit(const luci::CircleNeg *node) final;
-  // ShapeSignature visit(const luci::CircleNonMaxSuppressionV4 *node) final;
-  // ShapeSignature visit(const luci::CircleNonMaxSuppressionV5 *node) final;
-  // ShapeSignature visit(const luci::CircleNotEqual *node) final;
-  // ShapeSignature visit(const luci::CirclePack *node) final;
-  // ShapeSignature visit(const luci::CirclePad *node) final;
-  // ShapeSignature visit(const luci::CirclePadV2 *node) final;
-  // ShapeSignature visit(const luci::CirclePow *node) final;
-  // ShapeSignature visit(const luci::CirclePRelu *node) final;
-  // ShapeSignature visit(const luci::CircleRange *node) final;
-  // ShapeSignature visit(const luci::CircleRank *node) final;
-  // ShapeSignature visit(const luci::CircleMul *node) final;
-  // ShapeSignature visit(const luci::CircleOneHot *node) final;
-  ShapeSignature visit(const luci::CircleReduceAny *node) final;
-  ShapeSignature visit(const luci::CircleReduceMax *node) final;
-  ShapeSignature visit(const luci::CircleReduceMin *node) final;
-  ShapeSignature visit(const luci::CircleReduceProd *node) final;
-  ShapeSignature visit(const luci::CircleRelu *node) final;
-  ShapeSignature visit(const luci::CircleRelu6 *node) final;
-  ShapeSignature visit(const luci::CircleReluN1To1 *node) final;
-  // ShapeSignature visit(const luci::CircleReshape *node) final;
-  // ShapeSignature visit(const luci::CircleResizeBilinear *node) final;
-  // ShapeSignature visit(const luci::CircleResizeNearestNeighbor *node) final;
-  // ShapeSignature visit(const luci::CircleReverseSequence *node) final;
-  // ShapeSignature visit(const luci::CircleReverseV2 *node) final;
-  // ShapeSignature visit(const luci::CircleRound *node) final;
-  // ShapeSignature visit(const luci::CircleRsqrt *node) final;
-  // ShapeSignature visit(const luci::CircleScatterNd *node) final;
-  // ShapeSignature visit(const luci::CircleSegmentSum *node) final;
-  // ShapeSignature visit(const luci::CircleSelect *node) final;
-  // ShapeSignature visit(const luci::CircleSelectV2 *node) final;
-  // ShapeSignature visit(const luci::CircleShape *node) final;
-  // ShapeSignature visit(const luci::CircleSin *node) final;
-  // ShapeSignature visit(const luci::CircleSlice *node) final;
-  // ShapeSignature visit(const luci::CircleSoftmax *node) final;
-  // ShapeSignature visit(const luci::CircleSpaceToBatchND *node) final;
-  // ShapeSignature visit(const luci::CircleSpaceToDepth *node) final;
-  // ShapeSignature visit(const luci::CircleSparseToDense *node) final;
-  // ShapeSignature visit(const luci::CircleSplit *node) final;
-  // ShapeSignature visit(const luci::CircleSplitV *node) final;
-  // ShapeSignature visit(const luci::CircleSqrt *node) final;
-  // ShapeSignature visit(const luci::CircleSquare *node) final;
-  // ShapeSignature visit(const luci::CircleSquaredDifference *node) final;
-  // ShapeSignature visit(const luci::CircleSqueeze *node) final;
-  // ShapeSignature visit(const luci::CircleStridedSlice *node) final;
-  // ShapeSignature visit(const luci::CircleSub *node) final;
-  ShapeSignature visit(const luci::CircleSum *node) final;
-  // ShapeSignature visit(const luci::CircleTanh *node) final;
-  // ShapeSignature visit(const luci::CircleTile *node) final;
-  // ShapeSignature visit(const luci::CircleTopKV2 *node) final;
-  // ShapeSignature visit(const luci::CircleTranspose *node) final;
-  // ShapeSignature visit(const luci::CircleTransposeConv *node) final;
-  // ShapeSignature visit(const luci::CircleUnidirectionalSequenceLSTM *node) final;
-  // ShapeSignature visit(const luci::CircleUnique *node) final;
-  // ShapeSignature visit(const luci::CircleUnpack *node) final;
-  // ShapeSignature visit(const luci::CircleWhere *node) final ;
-  // ShapeSignature visit(const luci::CircleWhile *node) final;
-  // ShapeSignature visit(const luci::CircleZerosLike *node) final;
-
-  // Circle Only
-  // ShapeSignature visit(const luci::CircleBCQFullyConnected *node) final;
-  // ShapeSignature visit(const luci::CircleBCQGather *node) final;
-  // ShapeSignature visit(const luci::CircleInstanceNorm *node) final;
-
-  // Virtual
-  ShapeSignature visit(const luci::CircleInput *node) final;
-  ShapeSignature visit(const luci::CircleOutput *node) final;
-  ShapeSignature visit(const luci::CircleOutputDummy *node) final;
-  ShapeSignature visit(const luci::CircleOutputExclude *node) final;
-  // ShapeSignature visit(const luci::CircleCustomOut *node) final;
-  // ShapeSignature visit(const luci::CircleIfOut *node) final;
-  // ShapeSignature visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
-  // ShapeSignature visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
-  // ShapeSignature visit(const luci::CircleSplitOut *node) final;
-  // ShapeSignature visit(const luci::CircleSplitVOut *node) final;
-  // ShapeSignature visit(const luci::CircleTopKV2Out *node) final;
-  // ShapeSignature visit(const luci::CircleUniqueOut *node) final;
-  // ShapeSignature visit(const luci::CircleUnpackOut *node) final;
-  // ShapeSignature visit(const luci::CircleWhileOut *node) final;
-};
-
-} // namespace ssinf
-
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h

deleted file mode 100644 (file)

index fb5b3b3..0000000
--- a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
-#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleShapeSignature.h>
-
-namespace luci
-{
-
-namespace ssinf // Namespace for Shape Signature Inference
-{
-
-// Return empty signature if all of dimensions are known.
-// If at least one of dimensions is unknown, return signature without change.
-ShapeSignature legalized_signature(const luci::ShapeSignature &signature);
-
-// Return reduced input_signature with indices and keep_dims.
-//  - indices : reduction index
-//  - keep_dims : If true, rank is not changed. If false, rank is reduced along indices.
-ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims);
-
-// Return signature of index-th argument of node.
-ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index);
-
-} // namespace ssinf
-
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInference.h b/compiler/luci/service/include/luci/Service/CircleTypeInference.h

index 342214887f402a8ace3417b65391b34d83ec90ef..8eef469ac73b47549c2c648c6584e2fa498899b6 100644 (file)
--- a/compiler/luci/service/include/luci/Service/CircleTypeInference.h
+++ b/compiler/luci/service/include/luci/Service/CircleTypeInference.h
@@ -23,24 +23,11 @@
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleTypeInferenceHelper.h>
+#include <luci/Service/CircleTypeInferenceRule.h>
  
  namespace luci
  {
  
-/**
- * @brief Get the type of each node as NodeAnnotation
- *
- * HOW TO USE
- *
- *   TypeInference::get(g->nodes()->at(0));
- *   TypeInference::get(g->nodes()->at(...));
- */
-struct TypeInference
-{
-  static circle::TensorType get(loco::Node *node);
-};
-
  namespace tinf // namespace for Type Inference
  {
  
@@ -53,7 +40,12 @@ class Algorithm final : public luci::CircleNodeVisitor<loco::DataType>
  {
  public:
    // TODO Remove this when all of visit function is implemented
-  loco::DataType visit(const luci::CircleNode *node) final { return node->dtype(); }
+  loco::DataType visit(const luci::CircleNode *node) final
+  {
+    loco::DataType dtype;
+    luci::CircleTypeInferenceRule().infer(node, dtype);
+    return dtype;
+  }
  
    // loco::DataType visit(const luci::CircleAbs *node) final;
    // loco::DataType visit(const luci::CircleAdd *node) final;
@@ -78,6 +70,7 @@ public:
    // loco::DataType visit(const luci::CircleEqual *node) final;
    // loco::DataType visit(const luci::CircleExp *node) final;
    // loco::DataType visit(const luci::CircleExpandDims *node) final;
+  // loco::DataType visit(const luci::CircleFakeQuant *node) final;
    // loco::DataType visit(const luci::CircleFill *node) final;
    // loco::DataType visit(const luci::CircleFloor *node) final;
    // loco::DataType visit(const luci::CircleFloorDiv *node) final;
@@ -177,7 +170,7 @@ public:
    // loco::DataType visit(const luci::CircleOutputDummy *node) final;
    // loco::DataType visit(const luci::CircleOutputExclude *node) final;
    // loco::DataType visit(const luci::CircleCustomOut *node) final;
-  // loco::DataType visit(const luci::CircleIfOut *node) final;
+  loco::DataType visit(const luci::CircleIfOut *node) final;
    // loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
    // loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
    // loco::DataType visit(const luci::CircleSplitOut *node) final;
diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h

deleted file mode 100644 (file)

index 296f993..0000000
--- a/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
-#define __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
-
-#include <luci/IR/CircleNodes.h>
-
-#include <loco/IR/DataType.h>
-
-namespace luci
-{
-namespace tinf // Namespace for Type Inference
-{
-
-// Helper function will be added
-
-} // namespace tinf
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
diff --git a/compiler/luci/service/include/luci/Service/Nodes/CircleConst.h b/compiler/luci/service/include/luci/Service/Nodes/CircleConst.h

new file mode 100644 (file)

index 0000000..6049b42
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/Nodes/CircleConst.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SERVICE_CIRCLE_CONST_H__
+#define __LUCI_SERVICE_CIRCLE_CONST_H__
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci
+{
+
+/**
+ * @brief Return cloned object of CircleConst node
+ */
+luci::CircleConst *clone(luci::CircleConst *node);
+
+} // namespace luci
+
+#endif // __LUCI_SERVICE_CIRCLE_CONST_H__
diff --git a/compiler/luci/service/include/luci/Service/ShapeDescription.h b/compiler/luci/service/include/luci/Service/ShapeDescription.h

index 4d92be13fae1110ed7f6d7dbda647f8be0e16b0a..4671096fdbb70b5b4a26e80e1cd5ff3d457d683f 100644 (file)
--- a/compiler/luci/service/include/luci/Service/ShapeDescription.h
+++ b/compiler/luci/service/include/luci/Service/ShapeDescription.h
@@ -37,10 +37,6 @@ struct ShapeDescription
  // TODO remove these when CircleDialect is fully functioal
  ShapeDescription to_shape_description(const luci::CircleNode *node);
  ShapeDescription to_shape_description(const loco::TensorShape &shape);
-ShapeDescription to_shape_description(const loco::FeatureShape &shape);
-ShapeDescription to_shape_description(const loco::FilterShape &shape);
-ShapeDescription to_shape_description(const loco::BiasShape &shape);
-ShapeDescription to_shape_description(const loco::MatrixShape &shape);
  ShapeDescription to_shape_description(const loco::NodeShape &shape);
  
  template <typename Permutation> inline bool isNHWC(Permutation *perm);
diff --git a/compiler/luci/service/include/luci/Service/Validate.h b/compiler/luci/service/include/luci/Service/Validate.h

index 4b80d1d16f42476c2cdef8b8f8b4225d2eb7393b..456d6e504ad3dab191ee02ac72337b685813b1c6 100644 (file)
--- a/compiler/luci/service/include/luci/Service/Validate.h
+++ b/compiler/luci/service/include/luci/Service/Validate.h
@@ -17,6 +17,8 @@
  #ifndef __LUCI_SERVICE_VALIDATE_H__
  #define __LUCI_SERVICE_VALIDATE_H__
  
+#include <luci/IR/Module.h>
+
  #include <loco.h>
  
  namespace luci
@@ -24,6 +26,17 @@ namespace luci
  
  bool validate(loco::Graph *);
  
+/**
+ * @brief Return true if all nodes in graph have non empty name
+ */
+bool validate_name(loco::Graph *);
+
+/**
+ * @brief Return true if all names in the Module are unique
+ * @note  CircleOutput may have duplicate name
+ */
+bool validate_unique_name(luci::Module *);
+
  } // namespace luci
  
  #endif // __LUCI_SERVICE_VALIDATE_H__
diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h

new file mode 100644 (file)

index 0000000..02c7cd2
--- /dev/null
+++ b/compiler/luci/service/src/CircleCloneNode.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_CLONE_NODE_H__
+#define __CIRCLE_CLONE_NODE_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+class CloneNode final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNode(loco::Graph *graph) : _graph(graph){};
+
+public:
+  luci::CircleNode *visit(const luci::CircleAbs *) final;
+  luci::CircleNode *visit(const luci::CircleAdd *) final;
+  luci::CircleNode *visit(const luci::CircleAddN *) final;
+  luci::CircleNode *visit(const luci::CircleArgMax *) final;
+  luci::CircleNode *visit(const luci::CircleArgMin *) final;
+  luci::CircleNode *visit(const luci::CircleAveragePool2D *) final;
+  luci::CircleNode *visit(const luci::CircleBatchMatMul *) final;
+  luci::CircleNode *visit(const luci::CircleBatchToSpaceND *) final;
+  luci::CircleNode *visit(const luci::CircleCast *) final;
+  luci::CircleNode *visit(const luci::CircleCeil *) final;
+  luci::CircleNode *visit(const luci::CircleConcatenation *) final;
+  luci::CircleNode *visit(const luci::CircleConst *) final;
+  luci::CircleNode *visit(const luci::CircleConv2D *) final;
+  luci::CircleNode *visit(const luci::CircleCos *) final;
+  luci::CircleNode *visit(const luci::CircleCustom *) final;
+  luci::CircleNode *visit(const luci::CircleDepthToSpace *) final;
+  luci::CircleNode *visit(const luci::CircleDepthwiseConv2D *) final;
+  luci::CircleNode *visit(const luci::CircleDequantize *) final;
+  luci::CircleNode *visit(const luci::CircleDiv *) final;
+  luci::CircleNode *visit(const luci::CircleElu *) final;
+  luci::CircleNode *visit(const luci::CircleEqual *) final;
+  luci::CircleNode *visit(const luci::CircleExp *) final;
+  luci::CircleNode *visit(const luci::CircleExpandDims *) final;
+  luci::CircleNode *visit(const luci::CircleFakeQuant *) final;
+  luci::CircleNode *visit(const luci::CircleFill *) final;
+  luci::CircleNode *visit(const luci::CircleFloor *) final;
+  luci::CircleNode *visit(const luci::CircleFloorDiv *) final;
+  luci::CircleNode *visit(const luci::CircleFloorMod *) final;
+  luci::CircleNode *visit(const luci::CircleFullyConnected *) final;
+  luci::CircleNode *visit(const luci::CircleGather *) final;
+  luci::CircleNode *visit(const luci::CircleGatherNd *) final;
+  luci::CircleNode *visit(const luci::CircleGreater *) final;
+  luci::CircleNode *visit(const luci::CircleGreaterEqual *) final;
+  // luci::CircleNode *visit(const luci::CircleIf *) final;
+  luci::CircleNode *visit(const luci::CircleL2Normalize *) final;
+  luci::CircleNode *visit(const luci::CircleL2Pool2D *) final;
+  luci::CircleNode *visit(const luci::CircleLeakyRelu *) final;
+  luci::CircleNode *visit(const luci::CircleLess *) final;
+  luci::CircleNode *visit(const luci::CircleLessEqual *) final;
+  luci::CircleNode *visit(const luci::CircleLocalResponseNormalization *) final;
+  luci::CircleNode *visit(const luci::CircleLog *) final;
+  luci::CircleNode *visit(const luci::CircleLogicalAnd *) final;
+  luci::CircleNode *visit(const luci::CircleLogicalNot *) final;
+  luci::CircleNode *visit(const luci::CircleLogicalOr *) final;
+  luci::CircleNode *visit(const luci::CircleLogistic *) final;
+  luci::CircleNode *visit(const luci::CircleLogSoftmax *) final;
+  luci::CircleNode *visit(const luci::CircleMatrixDiag *) final;
+  luci::CircleNode *visit(const luci::CircleMatrixSetDiag *) final;
+  luci::CircleNode *visit(const luci::CircleMaximum *) final;
+  luci::CircleNode *visit(const luci::CircleMaxPool2D *) final;
+  luci::CircleNode *visit(const luci::CircleMean *) final;
+  luci::CircleNode *visit(const luci::CircleMinimum *) final;
+  luci::CircleNode *visit(const luci::CircleMirrorPad *) final;
+  luci::CircleNode *visit(const luci::CircleMul *) final;
+  luci::CircleNode *visit(const luci::CircleNeg *) final;
+  luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV4 *) final;
+  luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV5 *) final;
+  luci::CircleNode *visit(const luci::CircleNotEqual *) final;
+  luci::CircleNode *visit(const luci::CircleOneHot *) final;
+  luci::CircleNode *visit(const luci::CirclePack *) final;
+  luci::CircleNode *visit(const luci::CirclePad *) final;
+  luci::CircleNode *visit(const luci::CirclePadV2 *) final;
+  luci::CircleNode *visit(const luci::CirclePow *) final;
+  luci::CircleNode *visit(const luci::CirclePRelu *) final;
+  luci::CircleNode *visit(const luci::CircleRange *) final;
+  luci::CircleNode *visit(const luci::CircleRank *) final;
+  luci::CircleNode *visit(const luci::CircleReduceAny *) final;
+  luci::CircleNode *visit(const luci::CircleReduceMax *) final;
+  luci::CircleNode *visit(const luci::CircleReduceMin *) final;
+  luci::CircleNode *visit(const luci::CircleReduceProd *) final;
+  luci::CircleNode *visit(const luci::CircleRelu *) final;
+  luci::CircleNode *visit(const luci::CircleRelu6 *) final;
+  luci::CircleNode *visit(const luci::CircleReluN1To1 *) final;
+  luci::CircleNode *visit(const luci::CircleReshape *) final;
+  luci::CircleNode *visit(const luci::CircleResizeBilinear *) final;
+  luci::CircleNode *visit(const luci::CircleResizeNearestNeighbor *) final;
+  luci::CircleNode *visit(const luci::CircleReverseSequence *) final;
+  luci::CircleNode *visit(const luci::CircleReverseV2 *) final;
+  luci::CircleNode *visit(const luci::CircleRound *) final;
+  luci::CircleNode *visit(const luci::CircleRsqrt *) final;
+  luci::CircleNode *visit(const luci::CircleScatterNd *) final;
+  luci::CircleNode *visit(const luci::CircleSegmentSum *) final;
+  luci::CircleNode *visit(const luci::CircleSelect *) final;
+  luci::CircleNode *visit(const luci::CircleSelectV2 *) final;
+  luci::CircleNode *visit(const luci::CircleShape *) final;
+  luci::CircleNode *visit(const luci::CircleSin *) final;
+  luci::CircleNode *visit(const luci::CircleSlice *) final;
+  luci::CircleNode *visit(const luci::CircleSoftmax *) final;
+  luci::CircleNode *visit(const luci::CircleSpaceToBatchND *) final;
+  luci::CircleNode *visit(const luci::CircleSpaceToDepth *) final;
+  luci::CircleNode *visit(const luci::CircleSparseToDense *) final;
+  luci::CircleNode *visit(const luci::CircleSplit *) final;
+  luci::CircleNode *visit(const luci::CircleSplitV *) final;
+  luci::CircleNode *visit(const luci::CircleSqrt *) final;
+  luci::CircleNode *visit(const luci::CircleSquare *) final;
+  luci::CircleNode *visit(const luci::CircleSquaredDifference *) final;
+  luci::CircleNode *visit(const luci::CircleSqueeze *) final;
+  luci::CircleNode *visit(const luci::CircleStridedSlice *) final;
+  luci::CircleNode *visit(const luci::CircleSub *) final;
+  luci::CircleNode *visit(const luci::CircleSum *) final;
+  luci::CircleNode *visit(const luci::CircleTanh *) final;
+  luci::CircleNode *visit(const luci::CircleTile *) final;
+  luci::CircleNode *visit(const luci::CircleTopKV2 *) final;
+  luci::CircleNode *visit(const luci::CircleTranspose *) final;
+  luci::CircleNode *visit(const luci::CircleTransposeConv *) final;
+  luci::CircleNode *visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
+  luci::CircleNode *visit(const luci::CircleUnique *) final;
+  luci::CircleNode *visit(const luci::CircleUnpack *) final;
+  luci::CircleNode *visit(const luci::CircleWhere *) final;
+  // luci::CircleNode *visit(const luci::CircleWhile *) final;
+  luci::CircleNode *visit(const luci::CircleZerosLike *) final;
+
+  // Circle Only
+  luci::CircleNode *visit(const luci::CircleBCQFullyConnected *) final;
+  luci::CircleNode *visit(const luci::CircleBCQGather *) final;
+  luci::CircleNode *visit(const luci::CircleInstanceNorm *) final;
+
+  // Virtual
+  luci::CircleNode *visit(const luci::CircleCustomOut *) final;
+  // luci::CircleNode *visit(const luci::CircleIfOut *) final;
+  // luci::CircleNode *visit(const luci::CircleInput *) final;
+  luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV4Out *) final;
+  luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV5Out *) final;
+  // luci::CircleNode *visit(const luci::CircleOutput *) final;
+  luci::CircleNode *visit(const luci::CircleOutputDummy *) final;
+  luci::CircleNode *visit(const luci::CircleOutputExclude *) final;
+  luci::CircleNode *visit(const luci::CircleSplitOut *) final;
+  luci::CircleNode *visit(const luci::CircleSplitVOut *) final;
+  luci::CircleNode *visit(const luci::CircleTopKV2Out *) final;
+  luci::CircleNode *visit(const luci::CircleUniqueOut *) final;
+  luci::CircleNode *visit(const luci::CircleUnpackOut *) final;
+  // luci::CircleNode *visit(const luci::CircleWhileOut *) final;
+
+  // NOTE CircleNodeVisitor will throw if not supported here
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_CLONE_NODE_H__
diff --git a/compiler/luci/service/src/CircleNodeClone.cpp b/compiler/luci/service/src/CircleNodeClone.cpp

new file mode 100644 (file)

index 0000000..d2033dd
--- /dev/null
+++ b/compiler/luci/service/src/CircleNodeClone.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include "CircleCloneNode.h"
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @note  Attributes of specific node type like keep_dims() of CircleSum are
+ *        not copied.
+ */
+void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst)
+{
+  assert(src != nullptr);
+  assert(dst != nullptr);
+
+  dst->name(src->name());
+  dst->dtype(src->dtype());
+
+  dst->rank(src->rank());
+  for (uint32_t i = 0; i < src->rank(); i++)
+  {
+    dst->dim(i) = src->dim(i);
+  }
+  dst->shape_status(src->shape_status());
+
+  // quantparam
+  const auto *quantparam = src->quantparam();
+  if (quantparam != nullptr)
+  {
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->scale = quantparam->scale;
+    qparam->zerop = quantparam->zerop;
+    qparam->min = quantparam->min;
+    qparam->max = quantparam->max;
+    qparam->quantized_dimension = quantparam->quantized_dimension;
+
+    dst->quantparam(std::move(qparam));
+  }
+
+  // sparsity
+  const auto *sparsity = src->sparsityparam();
+  if (sparsity != nullptr)
+  {
+    auto sparam = std::make_unique<luci::SparsityParam>();
+    sparam->traversal_order = sparsity->traversal_order;
+    sparam->block_map = sparsity->block_map;
+    sparam->dim_metadata = sparsity->dim_metadata;
+
+    dst->sparsityparam(std::move(sparam));
+  }
+
+  // op version
+  dst->op_version(src->op_version());
+}
+
+/**
+ * @note  Each visit implementation must copy node specific attributes.
+ */
+luci::CircleNode *clone_node(const luci::CircleNode *node, loco::Graph *graph)
+{
+  if (node == nullptr || graph == nullptr)
+    return nullptr;
+
+  CloneNode cn(graph);
+  auto cloned = node->accept(&cn);
+  if (cloned != nullptr)
+    copy_common_attributes(node, cloned);
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleNodeClone.test.cpp b/compiler/luci/service/src/CircleNodeClone.test.cpp

new file mode 100644 (file)

index 0000000..5908eeb
--- /dev/null
+++ b/compiler/luci/service/src/CircleNodeClone.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleAdd.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleAdd *build_simple_add_graph(loco::Graph *g)
+{
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  node->name("name");
+  node->dtype(loco::DataType::FLOAT32);
+  node->rank(1);
+  node->dim(0).set(3);
+  node->shape_status(luci::ShapeStatus::VALID);
+  node->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  qparam->scale = {1.0};
+  qparam->zerop = {0};
+  qparam->min = {0.0};
+  qparam->max = {1.0};
+  qparam->quantized_dimension = 0;
+  node->quantparam(std::move(qparam));
+
+  auto sparam = std::make_unique<luci::SparsityParam>();
+  sparam->traversal_order = {0};
+  sparam->block_map = {0};
+  sparam->dim_metadata = {luci::DimMetaData(luci::DimensionType::DENSE, 1)};
+  node->sparsityparam(std::move(sparam));
+
+  node->op_version(2);
+
+  return node;
+}
+
+} // namespace
+
+TEST(CircleNodeCloneTest, copy_attribites)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  auto copy = g->nodes()->create<luci::CircleAdd>();
+  luci::copy_common_attributes(node, copy);
+
+  ASSERT_EQ(node->name(), copy->name());
+  ASSERT_EQ(node->dtype(), copy->dtype());
+  ASSERT_EQ(node->rank(), copy->rank());
+  ASSERT_EQ(node->shape_status(), copy->shape_status());
+
+  const auto *qparam_node = node->quantparam();
+  const auto *qparam_copy = copy->quantparam();
+  ASSERT_EQ(qparam_node->scale, qparam_copy->scale);
+
+  const auto *sparsity_node = node->sparsityparam();
+  const auto *sparsity_copy = copy->sparsityparam();
+  ASSERT_EQ(sparsity_node->traversal_order, sparsity_copy->traversal_order);
+
+  ASSERT_EQ(node->op_version(), copy->op_version());
+}
+
+TEST(CircleNodeCloneTest, clone_add_node)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  auto cg = loco::make_graph();
+  auto clone = clone_node(node, cg.get());
+
+  ASSERT_NE(nullptr, clone);
+  ASSERT_EQ(cg.get(), clone->graph());
+  ASSERT_EQ(node->name(), clone->name());
+  ASSERT_EQ(node->dtype(), clone->dtype());
+  ASSERT_EQ(node->rank(), clone->rank());
+  ASSERT_EQ(node->shape_status(), clone->shape_status());
+}
+
+TEST(CircleNodeCloneTest, clone_node_NEG)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  auto cg = loco::make_graph();
+  auto clone = luci::clone_node(nullptr, cg.get());
+  ASSERT_EQ(nullptr, clone);
+  auto clone2 = luci::clone_node(node, nullptr);
+  ASSERT_EQ(nullptr, clone2);
+}
diff --git a/compiler/luci/service/src/CircleShapeInference.cpp b/compiler/luci/service/src/CircleShapeInference.cpp

index db8ffd8ad2d2274299d76e1e3dbeb5c28bd4331c..73472069bc44b7e452fdf82641b46e280708c4f0 100644 (file)
--- a/compiler/luci/service/src/CircleShapeInference.cpp
+++ b/compiler/luci/service/src/CircleShapeInference.cpp
@@ -15,27 +15,16 @@
   */
  
  #include "luci/Service/CircleShapeInference.h"
-#include "luci/Service/ShapeDescription.h"
+
+#include "CircleShapeInferenceHelper.h"
  
  #include <loco.h>
-#include <loco/Service/ShapeInference.h>
  
  #include <luci/Log.h>
  
  #include <cassert>
  #include <iostream>
  
-namespace luci
-{
-
-ShapeDescription ShapeInference::get(loco::Node *node)
-{
-  assert(loco::shape_known(node));
-  return to_shape_description(loco::shape_get(node));
-}
-
-} // namespace luci
-
  namespace
  {
  
@@ -46,7 +35,11 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape
    {
      if (r)
        os << ",";
-    os << tensor_shape.dim(r).value();
+
+    if (tensor_shape.dim(r).known())
+      os << tensor_shape.dim(r).value();
+    else
+      os << "?";
    }
    os << "]";
    return os;
@@ -90,5 +83,5 @@ bool Rule::infer(const luci::CircleNode *circle_node, loco::TensorShape &shape)
    return true;
  }
  
-} // namespace ssinf
+} // namespace sinf
  } // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeInferenceHelper.cpp b/compiler/luci/service/src/CircleShapeInferenceHelper.cpp

index f7eb6c3ecc787ea98ae7005ff503971235aeb13b..2009aa59f5f85f4b20943c325be07ef154c4b5a9 100644 (file)
--- a/compiler/luci/service/src/CircleShapeInferenceHelper.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceHelper.cpp
@@ -14,7 +14,24 @@
   * limitations under the License.
   */
  
-#include "luci/Service/CircleShapeInferenceHelper.h"
+#include "CircleShapeInferenceHelper.h"
+
+namespace luci
+{
+
+loco::NodeShape shape_get(const loco::Node *node)
+{
+  assert(luci::shape_known(node));
+  return loco::NodeShape{sinf::circle_shape(loco::must_cast<const luci::CircleNode *>(node))};
+}
+
+bool shape_known(const loco::Node *node)
+{
+  return loco::must_cast<const luci::CircleNode *>(node)->shape_status() !=
+         luci::ShapeStatus::UNDEFINED;
+}
+
+} // namespace luci
  
  namespace luci
  {
@@ -26,7 +43,7 @@ loco::TensorShape circle_shape(const luci::CircleNode *node)
    loco::TensorShape shape;
    shape.rank(node->rank());
    for (uint32_t r = 0; r < node->rank(); ++r)
-    shape.dim(r) = loco::Dimension(node->dim(r).value());
+    shape.dim(r) = node->dim(r);
    return shape;
  }
  
diff --git a/compiler/luci/service/src/CircleShapeInferenceHelper.h b/compiler/luci/service/src/CircleShapeInferenceHelper.h

new file mode 100644 (file)

index 0000000..7c7ea49
--- /dev/null
+++ b/compiler/luci/service/src/CircleShapeInferenceHelper.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+
+#include <loco/IR/NodeShape.h>
+#include <loco/IR/TensorShape.h>
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+// NOTE Functions in this namespace will be removed after new inference
+//      algorithms are fully implemented.
+
+// This function is temporary function for deprecating loco::shape_get
+loco::NodeShape shape_get(const loco::Node *node);
+
+// This function is temporary function for deprecating loco::shape_known
+bool shape_known(const loco::Node *node);
+
+} // namespace luci
+
+namespace luci
+{
+namespace sinf // Namespace for Shape Inference
+{
+
+// Return shape of circle node as loco::TensorShape
+loco::TensorShape circle_shape(const luci::CircleNode *node);
+
+} // namespace sinf
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp

index 38ff619ab9a42715f63eb96118ba747f092f8893..c6d8232c33a62e88ce635ce43d4af060ba7f9e62 100644 (file)
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -17,6 +17,7 @@
  #include "luci/Service/CircleShapeInferenceRule.h"
  #include "Check.h"
  
+#include "CircleShapeInferenceHelper.h"
  #include "ShapeInfer_StridedSlice.h"
  
  #include <luci/IR/CircleNodes.h>
@@ -41,7 +42,11 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape
    {
      if (r)
        os << ",";
-    os << tensor_shape.dim(r).value();
+
+    if (tensor_shape.dim(r).known())
+      os << tensor_shape.dim(r).value();
+    else
+      os << "?";
    }
    os << "]";
    return os;
@@ -52,7 +57,15 @@ loco::TensorShape own_shape(const luci::CircleNode *node)
    loco::TensorShape shape;
    shape.rank(node->rank());
    for (uint32_t r = 0; r < node->rank(); ++r)
-    shape.dim(r) = loco::Dimension(node->dim(r).value());
+  {
+    // Shape inference rules in this file did not consider unknown dimension.
+    // If some node has unknown dimension, 0 is inserted and wrong shape
+    // inference was done as a result.
+    // To fix this, new shape inference algorithm is being implemented.
+    // Until new inference algorithm is fully implemented, unknown dimension
+    // would be represented as 1 along with TFLite expression.
+    shape.dim(r) = node->dim(r).known() ? node->dim(r).value() : 1;
+  }
    return shape;
  }
  
@@ -135,10 +148,8 @@ loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::Tenso
    output_shape.rank(rank);
    for (uint32_t axis = 0; axis < rank; ++axis)
    {
-    assert(x.dim(axis).known() && y.dim(axis).known());
-
-    auto x_dim = x.dim(axis).value();
-    auto y_dim = y.dim(axis).value();
+    auto x_dim = x.dim(axis).known() ? x.dim(axis).value() : 1;
+    auto y_dim = y.dim(axis).known() ? y.dim(axis).value() : 1;
  
      // each dimension of x and y should be same or one must be 1 if different
      if (!((x_dim == y_dim) || (x_dim == 1 || y_dim == 1)))
@@ -177,23 +188,29 @@ template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::Circ
  
  template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
  {
-  auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
-  auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>();
+  auto x_shape = luci::shape_get(node->x()).template as<loco::TensorShape>();
+  auto y_shape = luci::shape_get(node->y()).template as<loco::TensorShape>();
  
    auto output_shape = broadcast_shape(x_shape, y_shape);
  
    return loco::NodeShape{output_shape};
  }
  
+template <class CIRCLENODE> loco::NodeShape use_inputs(const CIRCLENODE *node)
+{
+  auto inputs_shape = luci::shape_get(node->inputs()).template as<loco::TensorShape>();
+  return loco::NodeShape{inputs_shape};
+}
+
  template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
  {
-  auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
+  auto x_shape = luci::shape_get(node->x()).template as<loco::TensorShape>();
    return loco::NodeShape{x_shape};
  }
  
  template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
  {
-  auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>();
+  auto shape = luci::shape_get(node->logits()).template as<loco::TensorShape>();
    return loco::NodeShape{shape};
  }
  
@@ -202,7 +219,7 @@ loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *pa
  {
    const loco::DataType S32 = loco::DataType::S32;
  
-  auto input_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
  
    // TODO support other data type
    LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
@@ -232,11 +249,11 @@ loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *pa
  
  loco::NodeShape infer_add_n(const luci::CircleAddN *node)
  {
-  auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+  auto shape = luci::shape_get(node->inputs(0)).as<loco::TensorShape>();
  
    for (uint32_t idx = 1; idx < node->arity(); ++idx)
    {
-    auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
+    auto shape_idx = luci::shape_get(node->inputs(idx)).as<loco::TensorShape>();
      if (!(shape == shape_idx))
      {
        INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
@@ -247,8 +264,8 @@ loco::NodeShape infer_add_n(const luci::CircleAddN *node)
  
  loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>();
  
    int64_t select_axis = 0;
    {
@@ -286,8 +303,8 @@ loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
  
  loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>();
  
    int64_t select_axis = 0;
    {
@@ -326,9 +343,7 @@ loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
  // Call this for CircleAvgPool2D and CircleMaxPool2D only
  template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
  {
-  LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
-
-  auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+  auto ifm_shape = luci::shape_get(node->value()).template as<loco::TensorShape>();
    assert(ifm_shape.rank() == 4);
  
    uint32_t input_height = ifm_shape.dim(1).value();
@@ -372,7 +387,7 @@ loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node)
  {
    const loco::DataType S32 = loco::DataType::S32;
  
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    // Support only input rank is 3 and 4
    assert(input_shape.rank() == 3 || input_shape.rank() == 4);
  
@@ -384,8 +399,8 @@ loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node)
    auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
    LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
  
-  auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
-  auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
+  auto const_block_shape_shape = luci::shape_get(const_block_shape).as<loco::TensorShape>();
+  auto const_crops_shape = luci::shape_get(const_crops).as<loco::TensorShape>();
    assert(const_block_shape_shape.rank() == 1);
    assert(const_crops_shape.rank() == 2);
  
@@ -423,8 +438,8 @@ struct OutputSize
  
  template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node)
  {
-  auto ifm_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
-  auto ker_shape = loco::shape_get(node->filter()).template as<loco::TensorShape>();
+  auto ifm_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
+  auto ker_shape = luci::shape_get(node->filter()).template as<loco::TensorShape>();
    assert(ifm_shape.rank() == 4);
    assert(ker_shape.rank() == 4);
  
@@ -496,7 +511,7 @@ loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
    loco::Dimension y_lhs = adj_y ? y_shape.dim(y_rank - 1) : y_shape.dim(y_rank - 2);
    loco::Dimension y_rhs = adj_y ? y_shape.dim(y_rank - 2) : y_shape.dim(y_rank - 1);
  
-  if (not(x_rhs == y_lhs))
+  if (x_rhs.known() && y_lhs.known() && not(x_rhs == y_lhs))
      INTERNAL_EXN("x_rhs and y_lhs should be same");
  
    uint32_t out_rank = output_shape.rank();
@@ -511,7 +526,7 @@ loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
    // TODO Support when CircleConcatenation has 0 input
    assert(node->numValues() > 0);
  
-  auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+  auto first_shape = luci::shape_get(node->values(0)).as<loco::TensorShape>();
    auto axis = node->axis();
    if (axis < 0)
      axis += first_shape.rank();
@@ -527,14 +542,20 @@ loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
  
    for (uint32_t i = 1; i < node->numValues(); ++i)
    {
-    auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->values(i)).as<loco::TensorShape>();
  
      for (uint32_t j = 0; j < output_shape.rank(); ++j)
      {
        if (j == static_cast<uint32_t>(axis))
+      {
+        // If dimension is unknown, value() will return 0.
+        // This is wrong but until new inference algorithm is implemented,
+        // this code will not be modified to keep compatibility.
          output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+      }
        else
-        assert(output_shape.dim(j) == input_shape.dim(j));
+        assert(!output_shape.dim(j).known() || !input_shape.dim(j).known() ||
+               output_shape.dim(j) == input_shape.dim(j));
      }
    }
  
@@ -545,8 +566,8 @@ loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
  {
    LOGGER(l);
  
-  auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
-  auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+  auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
+  auto ker_shape = luci::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
  
    INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank()
            << ")" << std::endl;
@@ -569,7 +590,7 @@ loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
  
  loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
  
    // Only data format NHWC is supported
@@ -601,12 +622,13 @@ loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
  
  loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
  {
-  auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
-  auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+  auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
+  auto ker_shape = luci::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
  
    assert(ifm_shape.rank() == 4);
    assert(ker_shape.rank() == 4);
    assert(ker_shape.dim(0).value() == 1);
+  assert(ifm_shape.dim(3).value() * node->depthMultiplier() == ker_shape.dim(3).value());
  
    auto os = infer_conv2d_type(node);
  
@@ -623,7 +645,7 @@ loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
  loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
  {
    const loco::DataType S32 = loco::DataType::S32;
-  auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto x_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    if (x_shape.rank() == 0)
    {
      // This maybe for unknown shape. We use shape from the node itself.
@@ -637,7 +659,7 @@ loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
    }
    int32_t axis = const_axis->at<S32>(0);
    LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
-                  (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
+                (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
                "Axis has to be between [-(D+1), D], where D is rank of input.");
    size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
    loco::TensorShape output_shape;
@@ -684,8 +706,8 @@ loco::NodeShape infer_fill(const luci::CircleFill *node)
  
  loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto weights_shape = luci::shape_get(node->weights()).as<loco::TensorShape>();
  
    // Checking shape capability for fully connected layer
    // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
@@ -715,8 +737,8 @@ loco::NodeShape infer_gather(const luci::CircleGather *node)
  {
    loco::TensorShape output_shape;
  
-  const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
-  const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  const auto input_shape = luci::shape_get(node->params()).as<loco::TensorShape>();
+  const auto positions_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
    int32_t axis = node->axis();
  
    // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
@@ -743,8 +765,8 @@ loco::NodeShape infer_gather_nd(const luci::CircleGatherNd *node)
  {
    loco::TensorShape output_shape;
  
-  const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
-  const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  const auto params_shape = luci::shape_get(node->params()).as<loco::TensorShape>();
+  const auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
  
    const auto params_rank = params_shape.rank();
    const auto indices_rank = indices_shape.rank();
@@ -791,7 +813,7 @@ loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node)
  {
    loco::TensorShape output_shape;
  
-  auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+  auto diagonal_shape = luci::shape_get(node->diagonal()).as<loco::TensorShape>();
    auto rank = diagonal_shape.rank();
  
    output_shape.rank(rank + 1);
@@ -808,8 +830,8 @@ loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node)
  
  loco::NodeShape infer_matrix_set_diag(const luci::CircleMatrixSetDiag *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto diagonal_shape = luci::shape_get(node->diagonal()).as<loco::TensorShape>();
  
    auto rank = diagonal_shape.rank();
  
@@ -831,7 +853,7 @@ loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indic
  {
    const loco::DataType S32 = loco::DataType::S32;
  
-  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(input).as<loco::TensorShape>();
    auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
  
    { // Exceptions
@@ -892,7 +914,7 @@ loco::NodeShape infer_mirror_pad(const luci::CircleMirrorPad *node)
  loco::NodeShape infer_one_hot(const luci::CircleOneHot *node)
  {
    const loco::DataType S32 = loco::DataType::S32;
-  auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
    // Only support OneHot node's depth() is CircleConst with type S32
    // TODO support depth with other types
    auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
@@ -925,11 +947,11 @@ loco::NodeShape infer_pack(const luci::CirclePack *node)
  {
    LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
  
-  auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+  auto first_shape = luci::shape_get(node->values(0)).as<loco::TensorShape>();
    // Make sure all inputs have the same shape.
    for (uint32_t i = 1; i < node->values_count(); ++i)
    {
-    auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+    auto in_shape = luci::shape_get(node->values(i)).as<loco::TensorShape>();
      LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
                  "All inputs must have the same shape");
    }
@@ -985,8 +1007,8 @@ loco::NodeShape infer_pad_v2(const luci::CirclePadV2 *node)
  
  loco::NodeShape infer_p_relu(const luci::CirclePRelu *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto alpha_shape = luci::shape_get(node->alpha()).as<loco::TensorShape>();
  
    auto output_shape = broadcast_shape(input_shape, alpha_shape);
  
@@ -1087,10 +1109,12 @@ loco::NodeShape infer_reshape(const luci::CircleReshape *node)
    loco::TensorShape output_shape = shape_by_input;
  
    // One of the dimensions can have special value -1, meaning its actual value should be inferred.
-  const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
-  const uint32_t input_element_count = loco::element_count(&input_shape);
+  const auto input_shape = luci::shape_get(node->tensor()).as<loco::TensorShape>();
+  uint32_t input_element_count = 1;
    uint32_t output_element_count = 1;
    uint32_t unknown_dim_index = UINT32_MAX;
+  for (uint32_t i = 0; i < input_shape.rank(); ++i)
+    input_element_count *= (input_shape.dim(i).known() ? input_shape.dim(i).value() : 1);
    for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
    {
      const uint32_t dim_value = output_shape.dim(dim_index).value();
@@ -1114,7 +1138,7 @@ loco::NodeShape infer_reshape(const luci::CircleReshape *node)
  
  loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
    if (input_shape.rank() != 4)
      INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
@@ -1142,7 +1166,7 @@ loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
  
  loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
    if (input_shape.rank() != 4)
      INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
@@ -1195,8 +1219,8 @@ loco::NodeShape infer_scatter_nd(const luci::CircleScatterNd *node)
  
  loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto segment_shape = luci::shape_get(node->segment_ids()).as<loco::TensorShape>();
  
    LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
    LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
@@ -1226,11 +1250,11 @@ loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
  
  loco::NodeShape infer_select(const luci::CircleSelect *node)
  {
-  auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
-  assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
+  auto t_shape = luci::shape_get(node->t()).as<loco::TensorShape>();
+  assert(t_shape == luci::shape_get(node->e()).as<loco::TensorShape>());
  
    // condition shape validation
-  auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+  auto c_shape = luci::shape_get(node->condition()).as<loco::TensorShape>();
    if (c_shape.rank() != t_shape.rank())
    {
      if (c_shape.rank() != 0 && c_shape.rank() != 1)
@@ -1248,9 +1272,9 @@ loco::NodeShape infer_select(const luci::CircleSelect *node)
  
  loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
  {
-  auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
-  auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
-  auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
+  auto c_shape = luci::shape_get(node->condition()).as<loco::TensorShape>();
+  auto t_shape = luci::shape_get(node->t()).as<loco::TensorShape>();
+  auto e_shape = luci::shape_get(node->e()).as<loco::TensorShape>();
  
    // validate ability to broadcast shapes to each other
    auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
@@ -1259,7 +1283,7 @@ loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
  
  loco::NodeShape infer_shape(const luci::CircleShape *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
    loco::TensorShape output_shape;
  
@@ -1274,7 +1298,7 @@ loco::NodeShape infer_slice(const luci::CircleSlice *node)
    const loco::DataType S32 = loco::DataType::S32;
    const loco::DataType S64 = loco::DataType::S64;
  
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
    auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
    auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
@@ -1318,7 +1342,7 @@ loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
  {
    const loco::DataType S32 = loco::DataType::S32;
  
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    // Support only input rank is 3 and 4
    assert(input_shape.rank() == 3 || input_shape.rank() == 4);
  
@@ -1330,8 +1354,8 @@ loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
    auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
    LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
  
-  auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
-  auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
+  auto const_block_shape_shape = luci::shape_get(const_block_shape).as<loco::TensorShape>();
+  auto const_paddings_shape = luci::shape_get(const_paddings).as<loco::TensorShape>();
    assert(const_block_shape_shape.rank() == 1);
    assert(const_paddings_shape.rank() == 2);
  
@@ -1374,7 +1398,7 @@ loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
  
  loco::NodeShape infer_space_to_depth(const luci::CircleSpaceToDepth *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
  
    // Only data format NHWC is supported
@@ -1412,19 +1436,33 @@ loco::NodeShape infer_sparse_to_dense(const luci::CircleSparseToDense *node)
      auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
      if (output_shape_node != nullptr)
      {
-      // Only support node with S32
-      LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
-                  "Only support int32 CircleConst");
+      const auto output_shape_type = output_shape_node->dtype();
  
        if (output_shape_node->rank() != 1)
          INTERNAL_EXN_V("Only support rank 1 CircleConst",
                         oops::to_uint32(output_shape_node->rank()));
  
-      shape.rank(output_shape_node->size<loco::DataType::S32>());
+      if (output_shape_type == loco::DataType::S32)
+      {
+        shape.rank(output_shape_node->size<loco::DataType::S32>());
  
-      for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+        for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+        {
+          shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+        }
+      }
+      else if (output_shape_type == loco::DataType::S64)
        {
-        shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+        shape.rank(output_shape_node->size<loco::DataType::S64>());
+
+        for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+        {
+          shape.dim(axis) = output_shape_node->at<loco::DataType::S64>(axis);
+        }
+      }
+      else
+      {
+        INTERNAL_EXN("Output shape of SparseToDense must be either int32 or int64");
        }
      }
      else
@@ -1453,7 +1491,7 @@ loco::NodeShape infer_strided_slice(const luci::CircleStridedSlice *node)
  
  loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
    // TODO input shape may be unknown before runtime
    std::vector<bool> do_squeeze(input_shape.rank(), false);
@@ -1508,7 +1546,7 @@ loco::NodeShape infer_tile(const luci::CircleTile *node)
  {
    const loco::DataType S32 = loco::DataType::S32;
  
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
  
    // TODO support non-const case
@@ -1534,7 +1572,7 @@ loco::NodeShape infer_tile(const luci::CircleTile *node)
  
  loco::NodeShape infer_transpose(const luci::CircleTranspose *node)
  {
-  auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->a()).as<loco::TensorShape>();
  
    auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
  
@@ -1576,7 +1614,7 @@ loco::NodeShape infer_unpack(const luci::CircleUnpack *node)
    // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
    // We'll set shape of CircleUnpack to shape of actual outputs
    // TODO fix this if any problem rises
-  auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
+  auto value_shape = luci::shape_get(node->value()).as<loco::TensorShape>();
  
    auto axis = node->axis();
    auto num = node->num();
@@ -1610,9 +1648,9 @@ loco::NodeShape infer_unpack(const luci::CircleUnpack *node)
  
  loco::NodeShape infer_unidirectionalsequencelstm(const luci::CircleUnidirectionalSequenceLSTM *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    auto recurrent_to_output_weights =
-      loco::shape_get(node->recurrent_to_output_weights()).as<loco::TensorShape>();
+    luci::shape_get(node->recurrent_to_output_weights()).as<loco::TensorShape>();
    auto rank = input_shape.rank();
    loco::TensorShape output_shape;
    output_shape.rank(rank);
@@ -1626,7 +1664,7 @@ loco::NodeShape infer_unidirectionalsequencelstm(const luci::CircleUnidirectiona
  
  loco::NodeShape infer_unique(const luci::CircleUnique *node)
  {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
    assert(input_shape.rank() == 1);
  
@@ -1641,7 +1679,7 @@ loco::NodeShape infer_bcq_fully_connected(const luci::CircleBCQFullyConnected *n
  {
    loco::TensorShape out_shape;
  
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
  
    LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
@@ -1664,8 +1702,8 @@ loco::NodeShape infer_bcq_gather(const luci::CircleBCQGather *node)
    loco::TensorShape input_shape;
    loco::TensorShape output_shape;
  
-  const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
-  const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  const auto input_binary_shape = luci::shape_get(node->input_binary()).as<loco::TensorShape>();
+  const auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
    auto axis = node->axis();
  
    auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
@@ -1712,46 +1750,6 @@ loco::NodeShape infer_output(const luci::CircleOutput *node)
    return loco::NodeShape{*output_shape};
  }
  
-loco::NodeShape infer_if_out(const luci::CircleIfOut *node)
-{
-  /**
-   * @note  IF operator type and shape are that of the "then" and "else"
-   *        Graph Outputs.
-   */
-  auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
-  if (circle_if == nullptr)
-  {
-    INTERNAL_EXN("CircleIf IR is not configured correctly");
-  }
-
-  auto index = node->index();
-  auto then_graph = circle_if->then_graph();
-  auto else_graph = circle_if->else_graph();
-  assert(then_graph != nullptr);
-  assert(else_graph != nullptr);
-
-  // shape and type are assumed to be same
-  // these are checked at post_import_graph() in Import
-  auto then_outputs = loco::output_nodes(then_graph);
-  auto else_outputs = loco::output_nodes(else_graph);
-  assert(then_outputs.size() == else_outputs.size());
-  assert(index < static_cast<int32_t>(then_outputs.size()));
-
-  auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
-  auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
-  auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
-  auto else_graph_outputs = else_graph->outputs();
-  assert(then_graph_outputs->size() == else_graph_outputs->size());
-
-  auto then_graph_output = then_graph_outputs->at(then_out->index());
-  auto else_graph_output = else_graph_outputs->at(else_out->index());
-  (void)else_graph_output; // make compiler happy for unused variable warnings
-  assert(*then_graph_output->shape() == *else_graph_output->shape());
-
-  return loco::NodeShape{*then_graph_output->shape()};
-}
-
  loco::NodeShape infer_non_max_suppression_v4_out(const luci::CircleNonMaxSuppressionV4Out *node)
  {
    const loco::DataType S32 = loco::DataType::S32;
@@ -1818,7 +1816,7 @@ loco::NodeShape infer_split_out(const luci::CircleSplitOut *node)
  
    loco::NodeShape unknown;
  
-  auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+  auto split_shape = luci::shape_get(split).as<loco::TensorShape>();
  
    auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
    if (split_dim == nullptr)
@@ -1852,7 +1850,7 @@ loco::NodeShape infer_split_v_out(const luci::CircleSplitVOut *node)
  
    loco::NodeShape unknown;
  
-  auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+  auto split_shape = luci::shape_get(split).as<loco::TensorShape>();
  
    auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
    if (size_splits == nullptr)
@@ -1913,7 +1911,7 @@ loco::NodeShape infer_top_k_v2_out(const luci::CircleTopKV2Out *node)
      INTERNAL_EXN("CircleSplit IR is not configured correctly");
  
    // shape of topkv2 is same as topkv2->input()
-  auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(topkv2).as<loco::TensorShape>();
  
    auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
    LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
@@ -1940,7 +1938,7 @@ loco::NodeShape infer_unique_out(const luci::CircleUniqueOut *node)
    }
    assert(node->index() == 1);
    auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
-  auto unique_shape = loco::shape_get(unique->input()).as<loco::TensorShape>();
+  auto unique_shape = luci::shape_get(unique->input()).as<loco::TensorShape>();
  
    assert(unique_shape.rank() == 1);
  
@@ -1958,7 +1956,7 @@ loco::NodeShape infer_unpack_out(const luci::CircleUnpackOut *node)
      INTERNAL_EXN("CircleUnpack IR is not configured correctly");
    }
  
-  auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
+  auto unpack_shape = luci::shape_get(unpack).as<loco::TensorShape>();
  
    return loco::NodeShape{unpack_shape};
  }
@@ -2025,8 +2023,8 @@ public:
  
    loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
    {
-    auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
-    auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+    auto x_shape = luci::shape_get(node->x()).as<loco::TensorShape>();
+    auto y_shape = luci::shape_get(node->y()).as<loco::TensorShape>();
  
      return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
    }
@@ -2065,7 +2063,7 @@ public:
  
    loco::NodeShape visit(const luci::CircleDequantize *node) final
    {
-    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
      return loco::NodeShape{input_shape};
    }
  
@@ -2073,7 +2071,7 @@ public:
  
    loco::NodeShape visit(const luci::CircleElu *node) final
    {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
  
      return loco::NodeShape{input_shape};
    }
@@ -2087,6 +2085,8 @@ public:
      return infer_expand_dims(node);
    }
  
+  loco::NodeShape visit(const luci::CircleFakeQuant *node) final { return use_inputs(node); }
+
    loco::NodeShape visit(const luci::CircleFill *node) final { return infer_fill(node); }
  
    loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
@@ -2112,7 +2112,7 @@ public:
    {
      // Shape of CircleIf is not used. Just use input 0
      assert(node->input_count() > 0);
-    const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input(0)).as<loco::TensorShape>();
      return loco::NodeShape{input_shape};
    }
  
@@ -2125,7 +2125,7 @@ public:
  
    loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
    {
-    const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
      return loco::NodeShape{input_shape};
    }
  
@@ -2135,7 +2135,7 @@ public:
  
    loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
    {
-    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
      return loco::NodeShape{input_shape};
    }
  
@@ -2184,13 +2184,13 @@ public:
  
    loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
    {
-    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    const auto boxes_shape = luci::shape_get(node->boxes()).as<loco::TensorShape>();
      return loco::NodeShape{boxes_shape};
    }
  
    loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5 *node) final
    {
-    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    const auto boxes_shape = luci::shape_get(node->boxes()).as<loco::TensorShape>();
      return loco::NodeShape{boxes_shape};
    }
  
@@ -2244,21 +2244,21 @@ public:
  
    loco::NodeShape visit(const luci::CircleRelu *node) final
    {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
  
      return loco::NodeShape{input_shape};
    }
  
    loco::NodeShape visit(const luci::CircleRelu6 *node) final
    {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
  
      return loco::NodeShape{input_shape};
    }
  
    loco::NodeShape visit(const luci::CircleReluN1To1 *node) final
    {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
  
      return loco::NodeShape{input_shape};
    }
@@ -2284,7 +2284,7 @@ public:
  
    loco::NodeShape visit(const luci::CircleReverseSequence *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
      return loco::NodeShape{input_shape};
    }
@@ -2293,9 +2293,9 @@ public:
  
    loco::NodeShape visit(const luci::CircleReverseV2 *node) final
    {
-    auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->tensor()).as<loco::TensorShape>();
  
-    LUCI_ASSERT(loco::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
+    LUCI_ASSERT(luci::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
                  "Tensor must be 1-D");
  
      return loco::NodeShape{input_shape};
@@ -2340,14 +2340,14 @@ public:
    loco::NodeShape visit(const luci::CircleSplit *node) final
    {
      // We'll set Split output as same as input so that SplitOut can handle it's own shape
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
      return loco::NodeShape{input_shape};
    }
  
    loco::NodeShape visit(const luci::CircleSplitV *node) final
    {
      // We'll set SplitV output as same as input so that SplitOut can handle it's own shape
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
      return loco::NodeShape{input_shape};
    }
  
@@ -2382,7 +2382,7 @@ public:
    loco::NodeShape visit(const luci::CircleTopKV2 *node) final
    {
      // set shape of this node as same as input
-    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
      return loco::NodeShape{input_shape};
    }
  
@@ -2408,13 +2408,13 @@ public:
    {
      // Shape of CircleWhile is not used. Just use input 0
      assert(node->arity() > 0);
-    const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input(0)).as<loco::TensorShape>();
      return loco::NodeShape{input_shape};
    }
  
    loco::NodeShape visit(const luci::CircleZerosLike *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
      return loco::NodeShape{input_shape};
    }
@@ -2429,7 +2429,7 @@ public:
  
    loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
  
      return loco::NodeShape{input_shape};
    }
@@ -2445,8 +2445,6 @@ public:
  
    loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); }
  
-  loco::NodeShape visit(const luci::CircleIfOut *node) final { return infer_if_out(node); }
-
    loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
    {
      return infer_non_max_suppression_v4_out(node);
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp

deleted file mode 100644 (file)

index ac27db3..0000000
--- a/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TestGraph.h"
-#include "luci/Service/CircleShapeInferenceRule.h"
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleDialect.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/CanonicalShapeInferenceRule.h>
-#include <loco/Service/MultiDialectShapeInferenceRule.h>
-
-#include <oops/InternalExn.h>
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-namespace
-{
-
-bool shape_pass(loco::Graph *g)
-{
-  loco::CanonicalShapeInferenceRule canonical_rule;
-  luci::CircleShapeInferenceRule circle_rule;
-  loco::MultiDialectShapeInferenceRule rules;
-
-  rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(luci::CircleDialect::get(), &circle_rule);
-
-  return loco::apply(&rules).to(g);
-}
-
-} // namespace
-
-TEST(CircleShapeInferenceRuleTest, minimal_with_CircleRelu)
-{
-  // Create a simple network
-  luci::test::TestGraph graph;
-  auto relu_node = graph.append<luci::CircleRelu>(graph.input_node);
-  graph.complete(relu_node);
-
-  // set shape
-  {
-    graph.input_node->rank(2);
-    graph.input_node->dim(0) = 3;
-    graph.input_node->dim(1) = 4;
-
-    graph.output_node->rank(2);
-    graph.output_node->dim(0) = 3;
-    graph.output_node->dim(1) = 4;
-
-    luci::test::graph_input_shape(graph.input_node);
-    luci::test::graph_output_shape(graph.output_node);
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(relu_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(relu_node));
-    ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(relu_node).domain());
-
-    auto shape = loco::shape_get(relu_node).as<loco::TensorShape>();
-    ASSERT_EQ(2, shape.rank());
-    ASSERT_EQ(3, shape.dim(0));
-    ASSERT_EQ(4, shape.dim(1));
-  }
-}
-
-// based on the case shown in
-// https://www.corvil.com/kb/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-tensorflow
-TEST(CircleShapeInferenceRuleTest, avgpool2d_valid)
-{
-  luci::test::TestGraph graph;
-  auto avg_node = graph.append<luci::CircleAveragePool2D>(graph.input_node);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({1, 4, 3, 1});
-    luci::test::graph_input_shape(input_node);
-  }
-  auto output_node = graph.output_node;
-  {
-    output_node->shape({1, 2, 1, 1});
-    luci::test::graph_output_shape(output_node);
-  }
-  // setting CircleAveragePool2D
-  {
-    avg_node->filter()->h(2);
-    avg_node->filter()->w(2);
-    avg_node->stride()->h(2);
-    avg_node->stride()->w(2);
-    avg_node->fusedActivationFunction(luci::FusedActFunc::NONE);
-    avg_node->padding(luci::Padding::VALID);
-  }
-  ASSERT_FALSE(loco::shape_known(avg_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(avg_node));
-    ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(avg_node).domain());
-
-    auto shape = loco::shape_get(avg_node).as<loco::TensorShape>();
-    ASSERT_EQ(4, shape.rank());
-    ASSERT_EQ(1, shape.dim(0).value());
-    ASSERT_EQ(2, shape.dim(1).value());
-    ASSERT_EQ(1, shape.dim(2).value());
-    ASSERT_EQ(1, shape.dim(3).value());
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, avgpool2d_same)
-{
-  luci::test::TestGraph graph;
-  auto avg_node = graph.append<luci::CircleAveragePool2D>(graph.input_node);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({1, 4, 3, 1});
-    luci::test::graph_input_shape(input_node);
-  }
-  auto output_node = graph.output_node;
-  {
-    output_node->shape({1, 2, 2, 1});
-    luci::test::graph_output_shape(output_node);
-  }
-
-  // setting CircleAveragePool2D
-  {
-    avg_node->filter()->h(2);
-    avg_node->filter()->w(2);
-    avg_node->stride()->h(2);
-    avg_node->stride()->w(2);
-    avg_node->fusedActivationFunction(luci::FusedActFunc::NONE);
-    avg_node->padding(luci::Padding::SAME);
-  }
-
-  ASSERT_FALSE(loco::shape_known(avg_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(avg_node));
-    ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(avg_node).domain());
-
-    auto shape = loco::shape_get(avg_node).as<loco::TensorShape>();
-    ASSERT_EQ(4, shape.rank());
-    ASSERT_EQ(1, shape.dim(0).value());
-    ASSERT_EQ(2, shape.dim(1).value());
-    ASSERT_EQ(2, shape.dim(2).value());
-    ASSERT_EQ(1, shape.dim(3).value());
-  }
-}
-
-/**
- * @note Function to test: Shape inference of two different input shapes
- *
- *       Rank expansion to higher input side
- *          x(2,1,5) + y(3,5) --> x(2,1,5) + y(1,3,5)
- *       Do output shape inference like numpy
- *          x(2,1,5) + y(1,3,5) --> output(2,3,5)
- *       For each axis, dim value should be same OR one of them should be 1
- */
-TEST(CircleShapeInferenceRuleTest, TFAdd_shapeinf_different)
-{
-  auto g = loco::make_graph();
-
-  auto x_node = g->nodes()->create<luci::CircleInput>();
-  {
-    x_node->rank(3);
-    x_node->dim(0) = 2;
-    x_node->dim(1) = 1;
-    x_node->dim(2) = 5;
-  }
-  auto y_node = g->nodes()->create<luci::CircleInput>();
-  {
-    y_node->rank(2);
-    y_node->dim(0) = 3;
-    y_node->dim(1) = 5;
-  }
-  auto add_node = g->nodes()->create<luci::CircleAdd>();
-  {
-    add_node->x(x_node);
-    add_node->y(y_node);
-  }
-  auto output_node = g->nodes()->create<luci::CircleOutput>();
-  {
-    output_node->from(add_node);
-  }
-
-  auto x_input = g->inputs()->create();
-  {
-    x_input->name("x");
-    luci::link(x_input, x_node);
-  }
-  auto y_input = g->inputs()->create();
-  {
-    y_input->name("y");
-    luci::link(y_input, y_node);
-  }
-  auto output = g->outputs()->create();
-  {
-    output->name("output");
-    luci::link(output, output_node);
-  }
-
-  luci::test::graph_input_shape(x_node);
-  luci::test::graph_input_shape(y_node);
-  luci::test::graph_output_shape(output_node);
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(add_node));
-
-  // shape inference
-  while (shape_pass(g.get()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(add_node));
-    ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(add_node).domain());
-
-    auto shape = loco::shape_get(add_node).as<loco::TensorShape>();
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(2, shape.dim(0));
-    ASSERT_EQ(3, shape.dim(1));
-    ASSERT_EQ(5, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleTranspose_simple)
-{
-  luci::test::ExampleGraph<luci::test::ExampleGraphType::CircleTranspose> g;
-
-  g.input_node->rank(3);
-  g.input_node->dim(0) = 3;
-  g.input_node->dim(1) = 8;
-  g.input_node->dim(2) = 1;
-
-  g.const_perm->dtype(loco::DataType::S32);
-  g.const_perm->rank(1);
-  g.const_perm->dim(0) = 3;
-  g.const_perm->size<loco::DataType::S32>(3);
-  g.const_perm->at<loco::DataType::S32>(0) = 1;
-  g.const_perm->at<loco::DataType::S32>(1) = 2;
-  g.const_perm->at<loco::DataType::S32>(2) = 0;
-
-  luci::test::graph_input_shape(g.input_node);
-  luci::test::graph_output_shape(g.output_node);
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(g.transpose_node));
-
-  // shape inference
-  while (shape_pass(g.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(g.transpose_node));
-
-    auto shape = loco::shape_get(g.transpose_node).as<loco::TensorShape>();
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(8, shape.dim(0));
-    ASSERT_EQ(1, shape.dim(1));
-    ASSERT_EQ(3, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleSqueeze)
-{
-  luci::test::TestGraph graph;
-  auto squeeze_node = graph.append<luci::CircleSqueeze>(graph.input_node);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({1, 4, 3, 1});
-  }
-  auto output_node = graph.output_node;
-  {
-    output_node->shape({4, 3, 1});
-  }
-
-  luci::test::graph_input_shape(input_node);
-  luci::test::graph_output_shape(output_node);
-
-  squeeze_node->squeeze_dims({0});
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(squeeze_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(squeeze_node));
-
-    auto shape = loco::shape_get(squeeze_node).as<loco::TensorShape>();
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(4, shape.dim(0));
-    ASSERT_EQ(3, shape.dim(1));
-    ASSERT_EQ(1, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleExpandDims)
-{
-  luci::test::TestGraph graph;
-  auto axis = graph.append<luci::CircleConst>();
-  axis->dtype(loco::DataType::S32);
-  axis->rank(0);
-  axis->size<loco::DataType::S32>(1);
-  axis->at<loco::DataType::S32>(0) = 1;
-
-  auto expand_dims = graph.append<luci::CircleExpandDims>(graph.input_node, axis);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({4, 3});
-  }
-
-  auto output_node = graph.output_node;
-  {
-    output_node->from(expand_dims);
-  }
-
-  luci::test::graph_input_shape(input_node);
-  luci::test::graph_output_shape(output_node);
-
-  // shape inference
-  while (shape_pass(graph.graph()))
-    ;
-
-  // validation
-  {
-    ASSERT_TRUE(loco::shape_known(expand_dims));
-
-    auto shape = loco::shape_get(expand_dims).as<loco::TensorShape>();
-
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(4, shape.dim(0));
-    ASSERT_EQ(1, shape.dim(1));
-    ASSERT_EQ(3, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleSqueezeAll)
-{
-  luci::test::TestGraph graph;
-  auto squeeze_node = graph.append<luci::CircleSqueeze>(graph.input_node);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({1, 4, 3, 1});
-  }
-  auto output_node = graph.output_node;
-  {
-    input_node->shape({4, 3});
-  }
-
-  luci::test::graph_input_shape(input_node);
-  luci::test::graph_output_shape(output_node);
-
-  squeeze_node->squeeze_dims({});
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(squeeze_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(squeeze_node));
-
-    auto shape = loco::shape_get(squeeze_node).as<loco::TensorShape>();
-    ASSERT_EQ(2, shape.rank());
-    ASSERT_EQ(4, shape.dim(0));
-    ASSERT_EQ(3, shape.dim(1));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_simple)
-{
-  luci::test::TestGraph graph;
-  auto indices_const = graph.append<luci::CircleConst>();
-  auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    auto output_node = graph.output_node;
-    output_node->shape({1, 2, 2, 3});
-    luci::test::graph_output_shape(output_node);
-  }
-
-  {
-    indices_const->shape({1, 2, 3});
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(gather_nd_node));
-
-    auto shape = loco::shape_get(gather_nd_node).as<loco::TensorShape>();
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(1, shape.dim(0));
-    ASSERT_EQ(2, shape.dim(1));
-    ASSERT_EQ(3, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_slices)
-{
-  luci::test::TestGraph graph;
-  auto indices_const = graph.append<luci::CircleConst>();
-  auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    auto output_node = graph.output_node;
-    output_node->shape({1, 2, 4, 4, 3});
-    luci::test::graph_output_shape(output_node);
-  }
-
-  {
-    indices_const->shape({1, 2, 1});
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(gather_nd_node));
-
-    auto shape = loco::shape_get(gather_nd_node).as<loco::TensorShape>();
-    ASSERT_EQ(5, shape.rank());
-    ASSERT_EQ(1, shape.dim(0));
-    ASSERT_EQ(2, shape.dim(1));
-    ASSERT_EQ(4, shape.dim(2));
-    ASSERT_EQ(4, shape.dim(3));
-    ASSERT_EQ(3, shape.dim(4));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_NEG)
-{
-  luci::test::TestGraph graph;
-  auto indices_const = graph.append<luci::CircleConst>();
-  auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    // Does not matter, because test should fail anyway
-    auto output_node = graph.output_node;
-    output_node->shape({0, 0, 0});
-    luci::test::graph_output_shape(output_node);
-  }
-
-  {
-    indices_const->shape({1, 2, 5});
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
-  // had to pack into lambda to check throw
-  auto lambda = [&]() {
-    // shape inference
-    while (shape_pass(graph.graph()) == true)
-      ;
-  };
-
-  ASSERT_THROW(lambda(), oops::InternalExn);
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleResizeNearestNeighbor)
-{
-  luci::test::TestGraph graph;
-  auto size_const = graph.append<luci::CircleConst>();
-  size_const->dtype(loco::DataType::S32);
-  size_const->rank(1);
-  size_const->dim(0) = 2;
-  size_const->size<loco::DataType::S32>(2);
-  size_const->at<loco::DataType::S32>(0) = 16;
-  size_const->at<loco::DataType::S32>(1) = 16;
-  auto resize_node = graph.append<luci::CircleResizeNearestNeighbor>(graph.input_node, size_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    auto output_node = graph.output_node;
-    output_node->from(resize_node);
-    luci::test::graph_output_shape(output_node);
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(resize_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(resize_node));
-
-    auto shape = loco::shape_get(resize_node).as<loco::TensorShape>();
-    ASSERT_EQ(4, shape.rank());
-    ASSERT_EQ(1, shape.dim(0));
-    ASSERT_EQ(16, shape.dim(1));
-    ASSERT_EQ(16, shape.dim(2));
-    ASSERT_EQ(3, shape.dim(3));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleResizeBilinear)
-{
-  luci::test::TestGraph graph;
-  auto size_const = graph.append<luci::CircleConst>();
-  size_const->dtype(loco::DataType::S32);
-  size_const->rank(1);
-  size_const->dim(0) = 2;
-  size_const->size<loco::DataType::S32>(2);
-  size_const->at<loco::DataType::S32>(0) = 16;
-  size_const->at<loco::DataType::S32>(1) = 16;
-  auto resize_node = graph.append<luci::CircleResizeBilinear>(graph.input_node, size_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    auto output_node = graph.output_node;
-    output_node->from(resize_node);
-    luci::test::graph_output_shape(output_node);
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(resize_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(resize_node));
-
-    auto shape = loco::shape_get(resize_node).as<loco::TensorShape>();
-    ASSERT_EQ(4, shape.rank());
-    ASSERT_EQ(1, shape.dim(0));
-    ASSERT_EQ(16, shape.dim(1));
-    ASSERT_EQ(16, shape.dim(2));
-    ASSERT_EQ(3, shape.dim(3));
-  }
-}
diff --git a/compiler/luci/service/src/CircleShapeSignatureInference.cpp b/compiler/luci/service/src/CircleShapeSignatureInference.cpp

deleted file mode 100644 (file)

index 1ccaa19..0000000
--- a/compiler/luci/service/src/CircleShapeSignatureInference.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Service/CircleShapeSignatureInference.h"
-
-#include <luci/Log.h>
-
-namespace
-{
-
-std::ostream &operator<<(std::ostream &os, const luci::ShapeSignature &shape_signature)
-{
-  os << "[";
-  for (uint32_t r = 0; r < shape_signature.rank(); ++r)
-  {
-    if (r)
-      os << ",";
-    os << shape_signature.dim(r);
-  }
-  os << "]";
-  return os;
-}
-
-} // namespace
-
-namespace luci
-{
-
-namespace ssinf
-{
-
-bool Rule::infer(const luci::CircleNode *circle_node, ShapeSignature &shape_signature) const
-{
-  LOGGER(l);
-
-  // There is nothing to check before ShapeSignatureInference.
-
-  Algorithm alg;
-
-  shape_signature = circle_node->accept(&alg);
-
-  VERBOSE(l, 1) << "[luci] Shape Signature( " << circle_node->name() << " )";
-  VERBOSE(l, 1) << "    before: " << circle_node->shape_signature();
-  VERBOSE(l, 1) << "     after: " << shape_signature;
-
-  return true;
-}
-
-} // namespace ssinf
-
-} // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp b/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp

deleted file mode 100644 (file)

index d7d1a24..0000000
--- a/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Service/CircleShapeSignatureInferenceHelper.h"
-
-#include <loco.h>
-
-#include <luci/Log.h>
-
-#include <oops/InternalExn.h>
-
-namespace luci
-{
-
-namespace ssinf
-{
-
-luci::ShapeSignature legalized_signature(const luci::ShapeSignature &signature)
-{
-  // If shape signature has at least one -1, it is not static.
-  for (uint32_t i = 0; i < signature.rank(); ++i)
-    if (signature.dim(i) == -1)
-      return signature;
-
-  // If all dimensions are static, return empty shape signature.
-  return luci::ShapeSignature();
-}
-
-ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims)
-{
-  LOGGER(l);
-
-  ShapeSignature input_signature;
-  ShapeSignature output_signature;
-
-  auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
-  if (circle_node->shape_signature().rank() > 0)
-    input_signature = circle_node->shape_signature();
-  else
-  {
-    input_signature.rank(circle_node->rank());
-    for (uint32_t i = 0; i < circle_node->rank(); ++i)
-      input_signature.dim(i) = circle_node->dim(i).value();
-  }
-
-  // If input rank is 0, it means that one of following case is occurred.
-  // - Input is scalar : result is always scalar
-  // - Input shape signature is not inferenced : cannot infer output shape signauture
-  // Therefore, when input signature rank is 0, always return empty signature.
-  if (input_signature.rank() == 0)
-    return output_signature;
-
-  // When reduction_indices is not constant
-  auto reduction_indices = dynamic_cast<const luci::CircleConst *>(indices);
-  if (reduction_indices == nullptr)
-  {
-    if (keep_dims)
-    {
-      // If keep_dims is true, rank is not changed.
-      output_signature.rank(input_signature.rank());
-      for (uint32_t i = 0; i < output_signature.rank(); ++i)
-        output_signature.dim(i) = -1;
-    }
-    else
-    {
-      // There is no way to inference for this case.
-      // Do nothing to return empty signature.
-      INFO(l) << "[CircleShapeSignatureInferenceHelper] " << circle_node->name() << std::endl;
-      INFO(l) << " reduced_signature : cannot infer because of non-constant node" << std::endl;
-    }
-
-    return output_signature;
-  }
-
-  std::vector<int32_t> reduction_values;
-  if (reduction_indices->dtype() == loco::DataType::S32)
-  {
-    auto reduction_size = reduction_indices->size<loco::DataType::S32>();
-    for (uint32_t i = 0; i < reduction_size; ++i)
-    {
-      int32_t axis = reduction_indices->at<loco::DataType::S32>(i);
-      if (axis < 0)
-        axis += input_signature.rank();
-
-      if (!(0 <= axis && axis < static_cast<int32_t>(input_signature.rank())))
-        INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
-
-      reduction_values.push_back(axis);
-    }
-  }
-  else if (reduction_indices->dtype() == loco::DataType::S64)
-  {
-    auto reduction_size = reduction_indices->size<loco::DataType::S64>();
-    for (uint32_t i = 0; i < reduction_size; ++i)
-    {
-      int32_t axis = static_cast<int32_t>(reduction_indices->at<loco::DataType::S64>(i));
-      if (axis < 0)
-        axis += input_signature.rank();
-
-      if (!(0 <= axis && axis < static_cast<int32_t>(input_signature.rank())))
-        INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
-
-      reduction_values.push_back(axis);
-    }
-  }
-  else
-  {
-    INTERNAL_EXN("Wrong reduction axis type, Only INT32, INT64 supported.");
-  }
-
-  if (keep_dims)
-  {
-    output_signature.rank(input_signature.rank());
-    for (uint32_t i = 0; i < input_signature.rank(); ++i)
-      output_signature.dim(i) = input_signature.dim(i);
-    for (uint32_t i = 0; i < reduction_values.size(); ++i)
-      output_signature.dim(reduction_values.at(i)) = 1;
-  }
-  else
-  {
-    std::vector<bool> check_reduce(input_signature.rank(), false);
-    for (uint32_t i = 0; i < reduction_values.size(); ++i)
-      check_reduce.at(reduction_values.at(i)) = true;
-
-    uint32_t reduce_cnt = 0;
-    for (uint32_t i = 0; i < check_reduce.size(); ++i)
-      if (check_reduce.at(i))
-        ++reduce_cnt;
-
-    output_signature.rank(input_signature.rank() - reduce_cnt);
-    for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
-      if (check_reduce.at(i) == false)
-        output_signature.dim(j++) = input_signature.dim(i);
-  }
-
-  return output_signature;
-}
-
-ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index)
-{
-  auto circle_input = loco::must_cast<luci::CircleNode *>(node->arg(index));
-  return circle_input->shape_signature();
-}
-
-} // namespace ssinf
-
-} // namespace luci
diff --git a/compiler/luci/service/src/CircleTypeInference.cpp b/compiler/luci/service/src/CircleTypeInference.cpp

index b4755b51a579228fffe2e55e6cecdea1cecb7259..db9a37cb0edea6ae5d02164fdc336d686b11fd00 100644 (file)
--- a/compiler/luci/service/src/CircleTypeInference.cpp
+++ b/compiler/luci/service/src/CircleTypeInference.cpp
@@ -15,72 +15,23 @@
   */
  
  #include "luci/Service/CircleTypeInference.h"
+#include "CircleTypeInferenceHelper.h"
  
  #include <luci/Log.h>
  
  #include <loco.h>
-#include <loco/Service/TypeInference.h>
-
-#include <mio/circle/schema_generated.h>
-#include <oops/InternalExn.h>
  
  #include <type_traits>
  
  namespace
  {
  
-circle::TensorType translateLocoTypeToCircle(loco::DataType dtype)
-{
-  switch (dtype)
-  {
-    case loco::DataType::U8:
-      return circle::TensorType_UINT8;
-    //  case loco::DataType::U16: unsupported
-    //  case loco::DataType::U32: unsupported
-    //  case loco::DataType::U64: unsupported
-    case loco::DataType::S8:
-      return circle::TensorType_INT8;
-    case loco::DataType::S16:
-      return circle::TensorType_INT16;
-    case loco::DataType::S32:
-      return circle::TensorType_INT32;
-    case loco::DataType::S64:
-      return circle::TensorType_INT64;
-    case loco::DataType::FLOAT16:
-      return circle::TensorType_FLOAT16;
-    case loco::DataType::FLOAT32:
-      return circle::TensorType_FLOAT32;
-    //  case loco::DataType::FLOAT64: unsupported
-    case loco::DataType::BOOL:
-      return circle::TensorType_BOOL;
-    default:
-      break;
-  }
-
-  INTERNAL_EXN_V("Invalid loco dtype", oops::to_uint32(dtype));
-}
-
-} // namespace
-
-namespace luci
-{
-
-circle::TensorType TypeInference::get(loco::Node *node)
-{
-  assert(loco::dtype_known(node));
-  return translateLocoTypeToCircle(loco::dtype_get(node));
-}
-
-} // namespace luci
-
-namespace
-{
-
  bool inputs_dtype_ready(const luci::CircleNode *node)
  {
    for (uint32_t arity = 0; arity < node->arity(); ++arity)
    {
-    if (node->dtype() == loco::DataType::Unknown)
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->arg(arity));
+    if (input_node->dtype() == loco::DataType::Unknown)
        return false;
    }
  
diff --git a/compiler/luci/service/src/CircleTypeInferenceHelper.cpp b/compiler/luci/service/src/CircleTypeInferenceHelper.cpp

index 75cd9f7b2353db7a00d1c8d101103688e3abf6b0..06edd70f23437e9195eca4df6df33f576a493487 100644 (file)
--- a/compiler/luci/service/src/CircleTypeInferenceHelper.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceHelper.cpp
@@ -14,7 +14,23 @@
   * limitations under the License.
   */
  
-#include "luci/Service/CircleTypeInferenceHelper.h"
+#include "CircleTypeInferenceHelper.h"
+
+namespace luci
+{
+
+loco::DataType dtype_get(const loco::Node *node)
+{
+  assert(luci::dtype_known(node));
+  return loco::must_cast<const luci::CircleNode *>(node)->dtype();
+}
+
+bool dtype_known(const loco::Node *node)
+{
+  return loco::must_cast<const luci::CircleNode *>(node)->dtype() != loco::DataType::Unknown;
+}
+
+} // namespace luci
  
  namespace luci
  {
diff --git a/compiler/luci/service/src/CircleTypeInferenceHelper.h b/compiler/luci/service/src/CircleTypeInferenceHelper.h

new file mode 100644 (file)

index 0000000..751340c
--- /dev/null
+++ b/compiler/luci/service/src/CircleTypeInferenceHelper.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco/IR/DataType.h>
+
+namespace luci
+{
+
+// NOTE Functions in this namespace will be removed after new inference
+//      algorithms are fully implemented.
+
+// This function is temporary function for deprecating loco::dtype_get
+loco::DataType dtype_get(const loco::Node *node);
+
+// This function is temporary function for deprecating loco::dtype_known
+bool dtype_known(const loco::Node *node);
+
+} // namespace luci
+
+namespace luci
+{
+namespace tinf // Namespace for Type Inference
+{
+
+// Helper function will be added
+
+} // namespace tinf
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp

index f738ab5a87a8980864078758654c7394a018d8d5..0b8d2af9e3b106ad20e655fd8a37a433dcae34de 100644 (file)
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -15,6 +15,7 @@
   */
  
  #include "luci/Service/CircleTypeInferenceRule.h"
+#include "CircleTypeInferenceHelper.h"
  
  #include <luci/IR/CircleDialect.h>
  #include <luci/IR/CircleNodeVisitor.h>
@@ -29,24 +30,24 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  {
    // TODO Given a tensor x of complex numbers, Abs operation returns a tensor of type float32 or
    // float64.
-  loco::DataType visit(const luci::CircleAbs *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleAbs *node) final { return luci::dtype_get(node->x()); }
  
-  loco::DataType visit(const luci::CircleAdd *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleAdd *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleAddN *node) final
    {
-    auto dtype = loco::dtype_get(node->inputs(0));
+    auto dtype = luci::dtype_get(node->inputs(0));
  
      for (uint32_t idx = 1; idx < node->arity(); ++idx)
      {
-      auto dtype_idx = loco::dtype_get(node->inputs(idx));
+      auto dtype_idx = luci::dtype_get(node->inputs(idx));
        if (dtype != dtype_idx)
        {
          INTERNAL_EXN_V("ADD_N dtype not same as the first input: ", idx);
        }
      }
  
-    return loco::dtype_get(node->inputs(0));
+    return luci::dtype_get(node->inputs(0));
    }
  
    loco::DataType visit(const luci::CircleArgMax *node) final { return node->output_type(); }
@@ -55,22 +56,22 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleAveragePool2D *node) final
    {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
    }
  
    loco::DataType visit(const luci::CircleBatchMatMul *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleBatchToSpaceND *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleCast *node) final { return node->dtype(); }
  
-  loco::DataType visit(const luci::CircleCeil *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleCeil *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleConcatenation *node) final
    {
@@ -78,87 +79,92 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      assert(node->numValues() > 0);
  
      for (uint32_t i = 1; i < node->numValues(); ++i)
-      assert(loco::dtype_get(node->values(i - 1)) == loco::dtype_get(node->values(i)));
+      assert(luci::dtype_get(node->values(i - 1)) == luci::dtype_get(node->values(i)));
  
-    return loco::dtype_get(node->values(0));
+    return luci::dtype_get(node->values(0));
    }
  
    loco::DataType visit(const luci::CircleConst *node) final { return node->dtype(); }
  
    loco::DataType visit(const luci::CircleConv2D *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
-  loco::DataType visit(const luci::CircleCos *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleCos *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleCustom *node) final
    {
      if (node->custom_code() == "BatchMatMulV2")
      {
-      return loco::dtype_get(node->inputs(0));
+      return luci::dtype_get(node->inputs(0));
      }
      return node->dtype();
    }
  
    loco::DataType visit(const luci::CircleDepthToSpace *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleDepthwiseConv2D *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleDequantize *) final { return loco::DataType::FLOAT32; }
  
-  loco::DataType visit(const luci::CircleDiv *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleDiv *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleElu *node) final
    {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
    }
  
    loco::DataType visit(const luci::CircleEqual *) final { return loco::DataType::BOOL; }
  
-  loco::DataType visit(const luci::CircleExp *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleExp *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleExpandDims *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
+  }
+
+  loco::DataType visit(const luci::CircleFakeQuant *node) final
+  {
+    return luci::dtype_get(node->inputs());
    }
  
    loco::DataType visit(const luci::CircleFill *node) final
    {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
    }
  
-  loco::DataType visit(const luci::CircleFloor *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleFloor *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleFloorDiv *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleFloorMod *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleFullyConnected *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleGather *node) final
    {
-    return loco::dtype_get(node->params());
+    return luci::dtype_get(node->params());
    }
  
    loco::DataType visit(const luci::CircleGatherNd *node) final
    {
-    return loco::dtype_get(node->params());
+    return luci::dtype_get(node->params());
    }
  
    loco::DataType visit(const luci::CircleGreater *) final { return loco::DataType::BOOL; }
@@ -169,22 +175,22 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
    {
      // Type of If is not used. Just use input 0
      assert(node->input_count() > 0);
-    return loco::dtype_get(node->input(0));
+    return luci::dtype_get(node->input(0));
    }
  
    loco::DataType visit(const luci::CircleL2Normalize *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleL2Pool2D *node) final
    {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
    }
  
    loco::DataType visit(const luci::CircleLeakyRelu *node) final
    {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
    }
  
    loco::DataType visit(const luci::CircleLess *) final { return loco::DataType::BOOL; }
@@ -193,75 +199,75 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleLocalResponseNormalization *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
-  loco::DataType visit(const luci::CircleLog *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleLog *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleLogicalAnd *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleLogicalNot *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleLogicalOr *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleLogistic *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleLogSoftmax *node) final
    {
-    return loco::dtype_get(node->logits());
+    return luci::dtype_get(node->logits());
    }
  
    loco::DataType visit(const luci::CircleMatrixDiag *node) final
    {
-    return loco::dtype_get(node->diagonal());
+    return luci::dtype_get(node->diagonal());
    }
  
    loco::DataType visit(const luci::CircleMatrixSetDiag *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
-  loco::DataType visit(const luci::CircleMaximum *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleMaximum *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleMaxPool2D *node) final
    {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
    }
  
    loco::DataType visit(const luci::CircleMean *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
-  loco::DataType visit(const luci::CircleMinimum *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleMinimum *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleMirrorPad *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
-  loco::DataType visit(const luci::CircleNeg *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleNeg *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final
    {
-    return loco::dtype_get(node->boxes());
+    return luci::dtype_get(node->boxes());
    }
  
    loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final
    {
-    return loco::dtype_get(node->boxes());
+    return luci::dtype_get(node->boxes());
    }
  
    loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
@@ -271,25 +277,25 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      // Only support CirclePack with one or more inputs
      assert(node->values_count() > 0);
  
-    auto first_value_type = loco::dtype_get(node->values(0));
+    auto first_value_type = luci::dtype_get(node->values(0));
      for (uint32_t i = 1; i < node->values_count(); ++i)
-      assert(first_value_type == loco::dtype_get(node->values(i)));
+      assert(first_value_type == luci::dtype_get(node->values(i)));
  
      return first_value_type;
    }
  
-  loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); }
+  loco::DataType visit(const luci::CirclePad *node) final { return luci::dtype_get(node->input()); }
  
    loco::DataType visit(const luci::CirclePadV2 *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CirclePow *node) final
    {
      // TODO make sure types cannot differ
-    auto x_type = loco::dtype_get(node->x());
-    auto y_type = loco::dtype_get(node->y());
+    auto x_type = luci::dtype_get(node->x());
+    auto y_type = luci::dtype_get(node->y());
  
      if (x_type != y_type)
        INTERNAL_EXN("Different datatype for x and y are not supported");
@@ -299,8 +305,8 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CirclePRelu *node) final
    {
-    auto input_type = loco::dtype_get(node->input());
-    auto alpha_type = loco::dtype_get(node->alpha());
+    auto input_type = luci::dtype_get(node->input());
+    auto alpha_type = luci::dtype_get(node->alpha());
  
      if (input_type != alpha_type)
        INTERNAL_EXN("Different datatype for input and alpha are not supported");
@@ -310,201 +316,201 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleRange *node) final
    {
-    return loco::dtype_get(node->start());
+    return luci::dtype_get(node->start());
    }
  
    loco::DataType visit(const luci::CircleRank *) final { return loco::DataType::S32; }
  
-  loco::DataType visit(const luci::CircleMul *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleMul *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleOneHot *node) final
    {
-    return loco::dtype_get(node->on_value());
+    return luci::dtype_get(node->on_value());
    }
  
    loco::DataType visit(const luci::CircleReduceAny *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleReduceMax *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleReduceMin *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleReduceProd *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleRelu *node) final
    {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
    }
  
    loco::DataType visit(const luci::CircleRelu6 *node) final
    {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
    }
  
    loco::DataType visit(const luci::CircleReluN1To1 *node) final
    {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
    }
  
    loco::DataType visit(const luci::CircleReshape *node) final
    {
-    return loco::dtype_get(node->tensor());
+    return luci::dtype_get(node->tensor());
    }
  
    loco::DataType visit(const luci::CircleResizeBilinear *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleReverseSequence *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleReverseV2 *node) final
    {
-    return loco::dtype_get(node->tensor());
+    return luci::dtype_get(node->tensor());
    }
  
-  loco::DataType visit(const luci::CircleRound *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleRound *node) final { return luci::dtype_get(node->x()); }
  
-  loco::DataType visit(const luci::CircleRsqrt *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleRsqrt *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleScatterNd *node) final
    {
-    return loco::dtype_get(node->updates());
+    return luci::dtype_get(node->updates());
    }
  
    loco::DataType visit(const luci::CircleSegmentSum *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleSelect *node) final
    {
-    assert(loco::dtype_get(node->t()) == loco::dtype_get(node->e()));
-    return loco::dtype_get(node->t());
+    assert(luci::dtype_get(node->t()) == luci::dtype_get(node->e()));
+    return luci::dtype_get(node->t());
    }
  
    loco::DataType visit(const luci::CircleSelectV2 *node) final
    {
-    assert(loco::dtype_get(node->t()) == loco::dtype_get(node->e()));
-    return loco::dtype_get(node->t());
+    assert(luci::dtype_get(node->t()) == luci::dtype_get(node->e()));
+    return luci::dtype_get(node->t());
    }
  
    loco::DataType visit(const luci::CircleShape *node) final { return node->out_type(); }
  
-  loco::DataType visit(const luci::CircleSin *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSin *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleSlice *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleSoftmax *node) final
    {
-    return loco::dtype_get(node->logits());
+    return luci::dtype_get(node->logits());
    }
  
    loco::DataType visit(const luci::CircleSpaceToBatchND *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleSpaceToDepth *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleSparseToDense *node) final
    {
-    return loco::dtype_get(node->values());
+    return luci::dtype_get(node->values());
    }
  
    loco::DataType visit(const luci::CircleSplit *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleSplitV *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
-  loco::DataType visit(const luci::CircleSqrt *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSqrt *node) final { return luci::dtype_get(node->x()); }
  
-  loco::DataType visit(const luci::CircleSquare *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSquare *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleSquaredDifference *node) final
    {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
    }
  
    loco::DataType visit(const luci::CircleSqueeze *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleStridedSlice *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
-  loco::DataType visit(const luci::CircleSub *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSub *node) final { return luci::dtype_get(node->x()); }
  
-  loco::DataType visit(const luci::CircleSum *node) final { return loco::dtype_get(node->input()); }
+  loco::DataType visit(const luci::CircleSum *node) final { return luci::dtype_get(node->input()); }
  
-  loco::DataType visit(const luci::CircleTanh *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleTanh *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleTile *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleTopKV2 *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleTranspose *node) final
    {
-    return loco::dtype_get(node->a());
+    return luci::dtype_get(node->a());
    }
  
    loco::DataType visit(const luci::CircleTransposeConv *node) final
    {
-    return loco::dtype_get(node->outBackprop());
+    return luci::dtype_get(node->outBackprop());
    }
  
    loco::DataType visit(const luci::CircleUnidirectionalSequenceLSTM *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleUnique *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleUnpack *node) final
    {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
    }
  
    loco::DataType visit(const luci::CircleWhere *) final { return loco::DataType::S64; }
@@ -513,12 +519,12 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
    {
      // Type of While is not used. Just use input 0
      assert(node->input_count() > 0);
-    return loco::dtype_get(node->input(0));
+    return luci::dtype_get(node->input(0));
    }
  
    loco::DataType visit(const luci::CircleZerosLike *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    // Circle Only
@@ -531,7 +537,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleInstanceNorm *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    // Virtual
@@ -548,7 +554,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      {
        // We don't care for the type if from() is CircleOutputDummy or CircleOutputExclude
        // from() type should match that of CircleOutput
-      assert(output_dtype == loco::dtype_get(node->from()));
+      assert(output_dtype == luci::dtype_get(node->from()));
      }
      return output_dtype;
    }
@@ -559,46 +565,6 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleCustomOut *node) final { return node->dtype(); }
  
-  loco::DataType visit(const luci::CircleIfOut *node) final
-  {
-    /**
-     * @note  IF operator type and shape are that of the "then" and "else"
-     *        Graph Outputs.
-     */
-    auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
-    if (circle_if == nullptr)
-    {
-      INTERNAL_EXN("CircleIf IR is not configured correctly");
-    }
-
-    auto index = node->index();
-    auto then_graph = circle_if->then_graph();
-    auto else_graph = circle_if->else_graph();
-    assert(then_graph != nullptr);
-    assert(else_graph != nullptr);
-
-    // shape and type are assumed to be same
-    // these are checked at post_import_graph() in Import
-    auto then_outputs = loco::output_nodes(then_graph);
-    auto else_outputs = loco::output_nodes(else_graph);
-    assert(then_outputs.size() == else_outputs.size());
-    assert(index < static_cast<int32_t>(then_outputs.size()));
-
-    auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
-    auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
-    auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
-    auto else_graph_outputs = else_graph->outputs();
-    assert(then_graph_outputs->size() == else_graph_outputs->size());
-
-    auto then_graph_output = then_graph_outputs->at(then_out->index());
-    auto else_graph_output = else_graph_outputs->at(else_out->index());
-    (void)else_graph_output; // make compiler happy for unused variable warnings
-    assert(then_graph_output->dtype() == else_graph_output->dtype());
-
-    return then_graph_output->dtype();
-  }
-
    loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final
    {
      (void)node;
@@ -619,19 +585,19 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleSplitOut *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleSplitVOut *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleTopKV2Out *node) final
    {
      // First output is same as input
      if (node->index() == 0)
-      return loco::dtype_get(node->input());
+      return luci::dtype_get(node->input());
      // Second outout is always S32
      assert(node->index() == 1);
      return loco::DataType::S32;
@@ -641,7 +607,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
    {
      if (node->index() == 0)
      {
-      return loco::dtype_get(node->input());
+      return luci::dtype_get(node->input());
      }
      assert(node->index() == 1);
      auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
@@ -650,7 +616,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleUnpackOut *node) final
    {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
    }
  
    loco::DataType visit(const luci::CircleWhileOut *node) final
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp

deleted file mode 100644 (file)

index 711a489..0000000
--- a/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TestGraph.h"
-#include <luci/Service/CircleTypeInferenceRule.h>
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleDialect.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/TypeInference.h>
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-TEST(CircleTypeInferenceRuleTest, minimal_with_CircleRelu)
-{
-  // Create a simple network
-  luci::test::TestGraph graph;
-  auto relu_node = graph.append<luci::CircleRelu>(graph.input_node);
-  graph.complete(relu_node);
-
-  // set dtype for nodes; like setting them in import
-  graph.input_node->dtype(loco::DataType::S32);
-  relu_node->dtype(loco::DataType::S32);
-  graph.output_node->dtype(loco::DataType::S32);
-
-  luci::test::graph_input_dtype(graph.input_node);
-  luci::test::graph_output_dtype(graph.output_node);
-
-  // pre-check
-  ASSERT_FALSE(loco::dtype_known(relu_node));
-
-  // type inference
-  luci::CircleTypeInferenceRule circle_rule;
-  loco::CanonicalTypeInferenceRule canon_rule;
-  loco::MultiDialectTypeInferenceRule rules;
-
-  rules.bind(loco::CanonicalDialect::get(), &canon_rule);
-  rules.bind(luci::CircleDialect::get(), &circle_rule);
-
-  loco::apply(&rules).to(graph.g.get());
-
-  // Verify
-  ASSERT_TRUE(loco::dtype_known(relu_node));
-  auto type = loco::dtype_get(relu_node);
-  ASSERT_EQ(loco::DataType::S32, type);
-}
diff --git a/compiler/luci/service/src/Nodes/CircleAbs.cpp b/compiler/luci/service/src/Nodes/CircleAbs.cpp

new file mode 100644 (file)

index 0000000..1327609
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAbs.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleAbs *)
+{
+  return _graph->nodes()->create<luci::CircleAbs>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleAbs.test.cpp b/compiler/luci/service/src/Nodes/CircleAbs.test.cpp

new file mode 100644 (file)

index 0000000..885b395
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAbs.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Abs)
+{
+  auto g = loco::make_graph();
+  auto node_abs = g->nodes()->create<luci::CircleAbs>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_abs, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_abs = dynamic_cast<luci::CircleAbs *>(cloned);
+  ASSERT_NE(nullptr, cloned_abs);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleAdd.cpp b/compiler/luci/service/src/Nodes/CircleAdd.cpp

new file mode 100644 (file)

index 0000000..0863432
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAdd.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleAdd *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleAdd>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleAdd.test.cpp b/compiler/luci/service/src/Nodes/CircleAdd.test.cpp

new file mode 100644 (file)

index 0000000..41a818b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAdd.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+/**
+ * @note Function to test: Shape inference of two different input shapes
+ *
+ *       Rank expansion to higher input side
+ *          x(2,1,5) + y(3,5) --> x(2,1,5) + y(1,3,5)
+ *       Do output shape inference like numpy
+ *          x(2,1,5) + y(1,3,5) --> output(2,3,5)
+ *       For each axis, dim value should be same OR one of them should be 1
+ */
+TEST(ShapeRuleTest, different_input_shapes_add)
+{
+  luci::CircleInput input1;
+  luci::CircleInput input2;
+  luci::CircleAdd add;
+
+  input1.shape({2, 1, 5});
+  input1.shape_status(luci::ShapeStatus::VALID);
+  input2.shape({3, 5});
+  input2.shape_status(luci::ShapeStatus::VALID);
+
+  add.x(&input1);
+  add.y(&input2);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&add, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(2, shape.dim(0).value());
+  ASSERT_EQ(3, shape.dim(1).value());
+  ASSERT_EQ(5, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_Add)
+{
+  auto g = loco::make_graph();
+  auto node_add = g->nodes()->create<luci::CircleAdd>();
+  node_add->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_add, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_add = dynamic_cast<luci::CircleAdd *>(cloned);
+  ASSERT_NE(nullptr, cloned_add);
+  ASSERT_EQ(node_add->fusedActivationFunction(), cloned_add->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Add_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_add = g->nodes()->create<luci::CircleAdd>();
+  node_add->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_add, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleAddN.cpp b/compiler/luci/service/src/Nodes/CircleAddN.cpp

new file mode 100644 (file)

index 0000000..e536e54
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAddN.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleAddN *node)
+{
+  auto arity = node->arity();
+  return _graph->nodes()->create<luci::CircleAddN>(arity);
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleAddN.test.cpp b/compiler/luci/service/src/Nodes/CircleAddN.test.cpp

new file mode 100644 (file)

index 0000000..5d5b822
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAddN.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_AddN)
+{
+  auto g = loco::make_graph();
+  auto node_addn = g->nodes()->create<luci::CircleAddN>(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_addn, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_addn = dynamic_cast<luci::CircleAddN *>(cloned);
+  ASSERT_NE(nullptr, cloned_addn);
+  ASSERT_EQ(node_addn->arity(), cloned_addn->arity());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleArgMax.cpp b/compiler/luci/service/src/Nodes/CircleArgMax.cpp

new file mode 100644 (file)

index 0000000..1b3bafa
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleArgMax.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleArgMax *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleArgMax>();
+  if (cloned != nullptr)
+    cloned->output_type(node->output_type());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleArgMax.test.cpp b/compiler/luci/service/src/Nodes/CircleArgMax.test.cpp

new file mode 100644 (file)

index 0000000..bb75884
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleArgMax.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ArgMax)
+{
+  auto g = loco::make_graph();
+  auto node_argmax = g->nodes()->create<luci::CircleArgMax>();
+  node_argmax->output_type(loco::DataType::FLOAT32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_argmax, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_argmax = dynamic_cast<luci::CircleArgMax *>(cloned);
+  ASSERT_NE(nullptr, cloned_argmax);
+  ASSERT_EQ(node_argmax->output_type(), cloned_argmax->output_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleArgMin.cpp b/compiler/luci/service/src/Nodes/CircleArgMin.cpp

new file mode 100644 (file)

index 0000000..fa54f7b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleArgMin.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleArgMin *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleArgMin>();
+  if (cloned != nullptr)
+    cloned->output_type(node->output_type());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleArgMin.test.cpp b/compiler/luci/service/src/Nodes/CircleArgMin.test.cpp

new file mode 100644 (file)

index 0000000..ca57946
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleArgMin.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ArgMin)
+{
+  auto g = loco::make_graph();
+  auto node_argmin = g->nodes()->create<luci::CircleArgMin>();
+  node_argmin->output_type(loco::DataType::FLOAT32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_argmin, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_argmin = dynamic_cast<luci::CircleArgMin *>(cloned);
+  ASSERT_NE(nullptr, cloned_argmin);
+  ASSERT_EQ(node_argmin->output_type(), cloned_argmin->output_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/service/src/Nodes/CircleAveragePool2D.cpp

new file mode 100644 (file)

index 0000000..4d27918
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAveragePool2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleAveragePool2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleAveragePool2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->filter()->h(node->filter()->h());
+    cloned->filter()->w(node->filter()->w());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleAveragePool2D.test.cpp b/compiler/luci/service/src/Nodes/CircleAveragePool2D.test.cpp

new file mode 100644 (file)

index 0000000..d048d14
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAveragePool2D.test.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_valid_pad_avgpool2d)
+{
+  luci::CircleInput input;
+  luci::CircleAveragePool2D avgpool_2d;
+
+  input.shape({1, 4, 3, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  avgpool_2d.value(&input);
+  avgpool_2d.filter()->h(2);
+  avgpool_2d.filter()->w(2);
+  avgpool_2d.stride()->h(2);
+  avgpool_2d.stride()->w(2);
+  avgpool_2d.fusedActivationFunction(luci::FusedActFunc::NONE);
+  avgpool_2d.padding(luci::Padding::VALID);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&avgpool_2d, shape));
+  ASSERT_EQ(4, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(2, shape.dim(1).value());
+  ASSERT_EQ(1, shape.dim(2).value());
+  ASSERT_EQ(1, shape.dim(3).value());
+}
+
+TEST(ShapeRuleTest, simple_same_pad_avgpool2d)
+{
+  luci::CircleInput input;
+  luci::CircleAveragePool2D avgpool_2d;
+
+  input.shape({1, 4, 3, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  avgpool_2d.value(&input);
+  avgpool_2d.filter()->h(2);
+  avgpool_2d.filter()->w(2);
+  avgpool_2d.stride()->h(2);
+  avgpool_2d.stride()->w(2);
+  avgpool_2d.fusedActivationFunction(luci::FusedActFunc::NONE);
+  avgpool_2d.padding(luci::Padding::SAME);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&avgpool_2d, shape));
+  ASSERT_EQ(4, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(2, shape.dim(1).value());
+  ASSERT_EQ(2, shape.dim(2).value());
+  ASSERT_EQ(1, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D)
+{
+  auto g = loco::make_graph();
+  auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+  node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_avgpool2d->padding(luci::Padding::SAME);
+  node_avgpool2d->filter()->h(1);
+  node_avgpool2d->filter()->w(2);
+  node_avgpool2d->stride()->h(3);
+  node_avgpool2d->stride()->w(4);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_avgpool2d = dynamic_cast<luci::CircleAveragePool2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_avgpool2d);
+  ASSERT_EQ(node_avgpool2d->fusedActivationFunction(), cloned_avgpool2d->fusedActivationFunction());
+  ASSERT_EQ(node_avgpool2d->padding(), cloned_avgpool2d->padding());
+  ASSERT_EQ(node_avgpool2d->filter()->h(), cloned_avgpool2d->filter()->h());
+  ASSERT_EQ(node_avgpool2d->filter()->w(), cloned_avgpool2d->filter()->w());
+  ASSERT_EQ(node_avgpool2d->stride()->h(), cloned_avgpool2d->stride()->h());
+  ASSERT_EQ(node_avgpool2d->stride()->w(), cloned_avgpool2d->stride()->w());
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+  node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_avgpool2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+  node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_avgpool2d->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.cpp

new file mode 100644 (file)

index 0000000..3edc06a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBCQFullyConnected *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleBCQFullyConnected>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->weights_hidden_size(node->weights_hidden_size());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.test.cpp b/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.test.cpp

new file mode 100644 (file)

index 0000000..90c192e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BCQFullyConnected)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_fc->weights_hidden_size(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fc = dynamic_cast<luci::CircleBCQFullyConnected *>(cloned);
+  ASSERT_NE(nullptr, cloned_fc);
+  ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+  ASSERT_EQ(node_fc->weights_hidden_size(), cloned_fc->weights_hidden_size());
+}
+
+TEST(CloneNodeTest, clone_BCQFullyConnected_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleBCQGather.cpp b/compiler/luci/service/src/Nodes/CircleBCQGather.cpp

new file mode 100644 (file)

index 0000000..35b6be7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBCQGather.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBCQGather *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleBCQGather>();
+  if (cloned != nullptr)
+  {
+    cloned->axis(node->axis());
+    cloned->input_hidden_size(node->input_hidden_size());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleBCQGather.test.cpp b/compiler/luci/service/src/Nodes/CircleBCQGather.test.cpp

new file mode 100644 (file)

index 0000000..a3f9e88
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBCQGather.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BCQGather)
+{
+  auto g = loco::make_graph();
+  auto node_gat = g->nodes()->create<luci::CircleBCQGather>();
+  node_gat->axis(3);
+  node_gat->input_hidden_size(5);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gat, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gat = dynamic_cast<luci::CircleBCQGather *>(cloned);
+  ASSERT_NE(nullptr, cloned_gat);
+  ASSERT_EQ(node_gat->axis(), cloned_gat->axis());
+  ASSERT_EQ(node_gat->input_hidden_size(), cloned_gat->input_hidden_size());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/service/src/Nodes/CircleBatchMatMul.cpp

new file mode 100644 (file)

index 0000000..c7a8bbd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBatchMatMul.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBatchMatMul *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleBatchMatMul>();
+  if (cloned != nullptr)
+  {
+    cloned->adj_x(node->adj_x());
+    cloned->adj_y(node->adj_y());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/service/src/Nodes/CircleBatchMatMul.test.cpp

new file mode 100644 (file)

index 0000000..e013fea
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBatchMatMul.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BatchMatMul)
+{
+  auto g = loco::make_graph();
+  auto node_bmm = g->nodes()->create<luci::CircleBatchMatMul>();
+  node_bmm->adj_x(true);
+  node_bmm->adj_y(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_bmm, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_bmm = dynamic_cast<luci::CircleBatchMatMul *>(cloned);
+  ASSERT_NE(nullptr, cloned_bmm);
+  ASSERT_EQ(node_bmm->adj_x(), cloned_bmm->adj_x());
+  ASSERT_EQ(node_bmm->adj_y(), cloned_bmm->adj_y());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.cpp

new file mode 100644 (file)

index 0000000..70aa05f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBatchToSpaceND *)
+{
+  return _graph->nodes()->create<luci::CircleBatchToSpaceND>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.test.cpp b/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.test.cpp

new file mode 100644 (file)

index 0000000..a45039f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BatchToSpaceND)
+{
+  auto g = loco::make_graph();
+  auto node_b2s = g->nodes()->create<luci::CircleBatchToSpaceND>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_b2s, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_b2s = dynamic_cast<luci::CircleBatchToSpaceND *>(cloned);
+  ASSERT_NE(nullptr, cloned_b2s);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCast.cpp b/compiler/luci/service/src/Nodes/CircleCast.cpp

new file mode 100644 (file)

index 0000000..75f15f9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCast.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCast *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleCast>();
+  if (cloned != nullptr)
+  {
+    cloned->in_data_type(node->in_data_type());
+    cloned->out_data_type(node->out_data_type());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCast.test.cpp b/compiler/luci/service/src/Nodes/CircleCast.test.cpp

new file mode 100644 (file)

index 0000000..1c4bacb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCast.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Cast)
+{
+  auto g = loco::make_graph();
+  auto node_cast = g->nodes()->create<luci::CircleCast>();
+  node_cast->in_data_type(loco::DataType::U16);
+  node_cast->out_data_type(loco::DataType::S32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_cast, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_cast = dynamic_cast<luci::CircleCast *>(cloned);
+  ASSERT_NE(nullptr, cloned_cast);
+  ASSERT_EQ(node_cast->in_data_type(), cloned_cast->in_data_type());
+  ASSERT_EQ(node_cast->out_data_type(), cloned_cast->out_data_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCeil.cpp b/compiler/luci/service/src/Nodes/CircleCeil.cpp

new file mode 100644 (file)

index 0000000..92d039a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCeil.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCeil *)
+{
+  return _graph->nodes()->create<luci::CircleCeil>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCeil.test.cpp b/compiler/luci/service/src/Nodes/CircleCeil.test.cpp

new file mode 100644 (file)

index 0000000..b182127
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCeil.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Ceil)
+{
+  auto g = loco::make_graph();
+  auto node_ceil = g->nodes()->create<luci::CircleCeil>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ceil, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ceil = dynamic_cast<luci::CircleCeil *>(cloned);
+  ASSERT_NE(nullptr, cloned_ceil);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleConcatenation.cpp b/compiler/luci/service/src/Nodes/CircleConcatenation.cpp

new file mode 100644 (file)

index 0000000..75d6a53
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConcatenation.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleConcatenation *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleConcatenation>(node->numValues());
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->axis(node->axis());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleConcatenation.test.cpp b/compiler/luci/service/src/Nodes/CircleConcatenation.test.cpp

new file mode 100644 (file)

index 0000000..270068c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConcatenation.test.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Concatenation)
+{
+  auto g = loco::make_graph();
+  auto node_concat = g->nodes()->create<luci::CircleConcatenation>(3);
+  node_concat->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_concat->axis(7);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_concat, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_concat = dynamic_cast<luci::CircleConcatenation *>(cloned);
+  ASSERT_NE(nullptr, cloned_concat);
+  ASSERT_EQ(node_concat->numValues(), cloned_concat->numValues());
+  ASSERT_EQ(node_concat->fusedActivationFunction(), cloned_concat->fusedActivationFunction());
+  ASSERT_EQ(node_concat->axis(), cloned_concat->axis());
+}
+
+TEST(CloneNodeTest, clone_Concatenation_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_concat = g->nodes()->create<luci::CircleConcatenation>(3);
+  node_concat->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_concat, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleConst.cpp b/compiler/luci/service/src/Nodes/CircleConst.cpp

new file mode 100644 (file)

index 0000000..0306ef4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConst.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+#include <loco.h>
+#include <loco/IR/Graph.h>
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace
+{
+
+template <loco::DataType T>
+void copy_values(const luci::CircleConst *node, luci::CircleConst *cloned)
+{
+  assert(T == node->dtype());
+  assert(T == cloned->dtype());
+
+  const auto size = node->size<T>();
+  cloned->size<T>(size);
+  for (uint32_t i = 0; i < size; i++)
+    cloned->at<T>(i) = node->at<T>(i);
+}
+
+luci::CircleConst *clone_circleconst(const luci::CircleConst *node, loco::Graph *graph)
+{
+  auto cloned = graph->nodes()->create<luci::CircleConst>();
+
+  if (cloned != nullptr)
+  {
+    // dtype/shape
+    cloned->dtype(node->dtype());
+    cloned->rank(node->rank());
+
+    // values
+    switch (node->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        copy_values<loco::DataType::FLOAT32>(node, cloned);
+        break;
+
+      case loco::DataType::U8:
+        copy_values<loco::DataType::U8>(node, cloned);
+        break;
+
+      case loco::DataType::S8:
+        copy_values<loco::DataType::S8>(node, cloned);
+        break;
+
+      case loco::DataType::S16:
+        copy_values<loco::DataType::S16>(node, cloned);
+        break;
+
+      case loco::DataType::S32:
+        copy_values<loco::DataType::S32>(node, cloned);
+        break;
+
+      case loco::DataType::S64:
+        copy_values<loco::DataType::S64>(node, cloned);
+        break;
+
+      case loco::DataType::BOOL:
+        copy_values<loco::DataType::BOOL>(node, cloned);
+        break;
+
+      default:
+        throw oops::UserExn("Unsupported tensor dtype");
+    }
+  }
+
+  return cloned;
+}
+
+} // namespace
+
+namespace luci
+{
+
+luci::CircleConst *clone(luci::CircleConst *node)
+{
+  auto *cloned = clone_circleconst(node, node->graph());
+
+  copy_common_attributes(node, cloned);
+
+  return cloned;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleConst *node)
+{
+  return clone_circleconst(node, _graph);
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleConst.test.cpp b/compiler/luci/service/src/Nodes/CircleConst.test.cpp

new file mode 100644 (file)

index 0000000..5d94798
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConst.test.cpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/Nodes/CircleConst.h"
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco.h>
+#include <loco/IR/Graph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleConst *new_const_s32(loco::Graph *g)
+{
+  // prepare source CircleConst
+  auto circle_const = g->nodes()->create<luci::CircleConst>();
+
+  const auto size = 2;
+
+  circle_const->dtype(loco::DataType::S32);
+  circle_const->rank(1);
+  circle_const->dim(0).set(size);
+  circle_const->shape_status(luci::ShapeStatus::VALID);
+
+  circle_const->size<loco::DataType::S32>(size);
+  for (uint32_t i = 0; i < size; i++)
+    circle_const->at<loco::DataType::S32>(i) = i;
+
+  // quantparam
+  auto quantparam = std::make_unique<luci::CircleQuantParam>();
+  quantparam->scale = {1.0};
+  quantparam->zerop = {0};
+  quantparam->min = {-127.0};
+  quantparam->max = {127.0};
+  quantparam->quantized_dimension = 1;
+  circle_const->quantparam(std::move(quantparam));
+
+  // sparsityparam
+  auto sparam = std::make_unique<luci::SparsityParam>();
+  sparam->traversal_order = {1};
+  sparam->block_map = {1};
+  sparam->dim_metadata = {};
+  circle_const->sparsityparam(std::move(sparam));
+
+  return circle_const;
+}
+
+template <loco::DataType DT> luci::CircleConst *new_empty_const(loco::Graph *g)
+{
+  auto circle_const = g->nodes()->create<luci::CircleConst>();
+
+  const auto size = 0;
+
+  circle_const->dtype(DT);
+  circle_const->rank(1);
+  circle_const->dim(0).set(size);
+  circle_const->shape_status(luci::ShapeStatus::VALID);
+  circle_const->size<DT>(size);
+
+  return circle_const;
+}
+
+} // namespace
+
+TEST(CircleConstTest, clone)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_const_s32(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::S32, const_cloned->dtype());
+  ASSERT_EQ(1, const_cloned->rank());
+  ASSERT_EQ(2, const_cloned->dim(0).value());
+  ASSERT_EQ(2, const_cloned->size<loco::DataType::S32>());
+  ASSERT_EQ(0, const_cloned->at<loco::DataType::S32>(0));
+  ASSERT_EQ(1, const_cloned->at<loco::DataType::S32>(1));
+  ASSERT_NE(nullptr, const_cloned->quantparam());
+  ASSERT_NE(nullptr, const_cloned->sparsityparam());
+}
+
+TEST(CircleConstTest, clone_U8)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_empty_const<loco::DataType::U8>(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::U8, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_S8)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_empty_const<loco::DataType::S8>(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::S8, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_S64)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_empty_const<loco::DataType::S64>(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::S64, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_BOOL)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_empty_const<loco::DataType::BOOL>(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::BOOL, const_cloned->dtype());
+}
+
+TEST(CloneNodeTest, clone_Const)
+{
+  auto g = loco::make_graph();
+  auto node_const = new_const_s32(g.get());
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_const, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_const = dynamic_cast<luci::CircleConst *>(cloned);
+  ASSERT_NE(nullptr, cloned_const);
+  ASSERT_EQ(loco::DataType::S32, cloned_const->dtype());
+  ASSERT_EQ(1, cloned_const->rank());
+  ASSERT_EQ(2, cloned_const->dim(0).value());
+  ASSERT_EQ(2, cloned_const->size<loco::DataType::S32>());
+  ASSERT_EQ(0, cloned_const->at<loco::DataType::S32>(0));
+  ASSERT_EQ(1, cloned_const->at<loco::DataType::S32>(1));
+  ASSERT_NE(nullptr, cloned_const->quantparam());
+  ASSERT_NE(nullptr, cloned_const->sparsityparam());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleConv2D.cpp b/compiler/luci/service/src/Nodes/CircleConv2D.cpp

new file mode 100644 (file)

index 0000000..08cd87e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConv2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleConv2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleConv2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+    cloned->dilation()->h(node->dilation()->h());
+    cloned->dilation()->w(node->dilation()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleConv2D.test.cpp b/compiler/luci/service/src/Nodes/CircleConv2D.test.cpp

new file mode 100644 (file)

index 0000000..c265d6c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConv2D.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Conv2D)
+{
+  auto g = loco::make_graph();
+  auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+  node_conv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_conv2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_conv2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_conv2d = dynamic_cast<luci::CircleConv2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_conv2d);
+  ASSERT_EQ(node_conv2d->fusedActivationFunction(), cloned_conv2d->fusedActivationFunction());
+  ASSERT_EQ(node_conv2d->padding(), cloned_conv2d->padding());
+}
+
+TEST(CloneNodeTest, clone_Conv2D_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+  node_conv2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_conv2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_conv2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_Conv2D_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+  node_conv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_conv2d->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_conv2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCos.cpp b/compiler/luci/service/src/Nodes/CircleCos.cpp

new file mode 100644 (file)

index 0000000..c46e374
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCos.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCos *)
+{
+  return _graph->nodes()->create<luci::CircleCos>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCos.test.cpp b/compiler/luci/service/src/Nodes/CircleCos.test.cpp

new file mode 100644 (file)

index 0000000..a25943b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCos.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Cos)
+{
+  auto g = loco::make_graph();
+  auto node_cos = g->nodes()->create<luci::CircleCos>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_cos, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_cos = dynamic_cast<luci::CircleCos *>(cloned);
+  ASSERT_NE(nullptr, cloned_cos);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCustom.cpp b/compiler/luci/service/src/Nodes/CircleCustom.cpp

new file mode 100644 (file)

index 0000000..a9764c3
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCustom.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCustom *node)
+{
+  uint32_t num_in = node->numInputs();
+  uint32_t num_out = node->numOutputs();
+  auto *cloned = _graph->nodes()->create<luci::CircleCustom>(num_in, num_out);
+  if (cloned != nullptr)
+  {
+    cloned->custom_options(node->custom_options());
+    cloned->custom_code(node->custom_code());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCustom.test.cpp b/compiler/luci/service/src/Nodes/CircleCustom.test.cpp

new file mode 100644 (file)

index 0000000..6fee68e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCustom.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <vector>
+
+TEST(CloneNodeTest, clone_Custom)
+{
+  auto g = loco::make_graph();
+  auto node_custom = g->nodes()->create<luci::CircleCustom>(2, 3);
+  std::vector<uint8_t> options({0x55, 0x56, 0x57});
+  std::string code = "hello";
+  node_custom->custom_options(options);
+  node_custom->custom_code(code);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_custom, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_custom = dynamic_cast<luci::CircleCustom *>(cloned);
+  ASSERT_NE(nullptr, cloned_custom);
+  auto cloned_options = cloned_custom->custom_options();
+  ASSERT_EQ(options.size(), cloned_options.size());
+  auto size = options.size();
+  for (size_t s = 0; s < size; ++s)
+    ASSERT_EQ(options.at(s), cloned_options.at(s));
+  ASSERT_TRUE(node_custom->custom_code() == cloned_custom->custom_code());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCustomOut.cpp b/compiler/luci/service/src/Nodes/CircleCustomOut.cpp

new file mode 100644 (file)

index 0000000..84577f5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCustomOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCustomOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleCustomOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCustomOut.test.cpp b/compiler/luci/service/src/Nodes/CircleCustomOut.test.cpp

new file mode 100644 (file)

index 0000000..15121ba
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCustomOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_CustomOut)
+{
+  auto g = loco::make_graph();
+  auto node_cout = g->nodes()->create<luci::CircleCustomOut>();
+  node_cout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_cout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_cout = dynamic_cast<luci::CircleCustomOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_cout);
+  ASSERT_EQ(node_cout->index(), cloned_cout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/service/src/Nodes/CircleDepthToSpace.cpp

new file mode 100644 (file)

index 0000000..7e0bc7d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDepthToSpace.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleDepthToSpace *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleDepthToSpace>();
+  if (cloned != nullptr)
+    cloned->block_size(node->block_size());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDepthToSpace.test.cpp b/compiler/luci/service/src/Nodes/CircleDepthToSpace.test.cpp

new file mode 100644 (file)

index 0000000..192b10b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDepthToSpace.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_DepthToSpace)
+{
+  auto g = loco::make_graph();
+  auto node_d2s = g->nodes()->create<luci::CircleDepthToSpace>();
+  node_d2s->block_size(32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_d2s, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_d2s = dynamic_cast<luci::CircleDepthToSpace *>(cloned);
+  ASSERT_NE(nullptr, cloned_d2s);
+  ASSERT_EQ(node_d2s->block_size(), cloned_d2s->block_size());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.cpp

new file mode 100644 (file)

index 0000000..8e0b23d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleDepthwiseConv2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleDepthwiseConv2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+    cloned->depthMultiplier(node->depthMultiplier());
+    cloned->dilation()->h(node->dilation()->h());
+    cloned->dilation()->w(node->dilation()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.test.cpp b/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.test.cpp

new file mode 100644 (file)

index 0000000..8657464
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D)
+{
+  auto g = loco::make_graph();
+  auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+  node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_dwconv2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_dwconv2d = dynamic_cast<luci::CircleDepthwiseConv2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_dwconv2d);
+  ASSERT_EQ(node_dwconv2d->fusedActivationFunction(), cloned_dwconv2d->fusedActivationFunction());
+  ASSERT_EQ(node_dwconv2d->padding(), cloned_dwconv2d->padding());
+}
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+  node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_dwconv2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+  node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_dwconv2d->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDequantize.cpp b/compiler/luci/service/src/Nodes/CircleDequantize.cpp

new file mode 100644 (file)

index 0000000..79983e4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDequantize.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleDequantize *)
+{
+  return _graph->nodes()->create<luci::CircleDequantize>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDequantize.test.cpp b/compiler/luci/service/src/Nodes/CircleDequantize.test.cpp

new file mode 100644 (file)

index 0000000..e1c563a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDequantize.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Dequantize)
+{
+  auto g = loco::make_graph();
+  auto node_dq = g->nodes()->create<luci::CircleDequantize>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dq, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_dq = dynamic_cast<luci::CircleDequantize *>(cloned);
+  ASSERT_NE(nullptr, cloned_dq);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDiv.cpp b/compiler/luci/service/src/Nodes/CircleDiv.cpp

new file mode 100644 (file)

index 0000000..7c48d8b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDiv.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleDiv *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleDiv>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDiv.test.cpp b/compiler/luci/service/src/Nodes/CircleDiv.test.cpp

new file mode 100644 (file)

index 0000000..5182ac9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDiv.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Div)
+{
+  auto g = loco::make_graph();
+  auto node_div = g->nodes()->create<luci::CircleDiv>();
+  node_div->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_div, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_div = dynamic_cast<luci::CircleDiv *>(cloned);
+  ASSERT_NE(nullptr, cloned_div);
+  ASSERT_EQ(node_div->fusedActivationFunction(), cloned_div->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Div_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_div = g->nodes()->create<luci::CircleDiv>();
+  node_div->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_div, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleElu.cpp b/compiler/luci/service/src/Nodes/CircleElu.cpp

new file mode 100644 (file)

index 0000000..e2df302
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleElu.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleElu *)
+{
+  return _graph->nodes()->create<luci::CircleElu>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleElu.test.cpp b/compiler/luci/service/src/Nodes/CircleElu.test.cpp

new file mode 100644 (file)

index 0000000..e75b3bc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleElu.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Elu)
+{
+  auto g = loco::make_graph();
+  auto node_elu = g->nodes()->create<luci::CircleElu>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_elu, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_elu = dynamic_cast<luci::CircleElu *>(cloned);
+  ASSERT_NE(nullptr, cloned_elu);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleEqual.cpp b/compiler/luci/service/src/Nodes/CircleEqual.cpp

new file mode 100644 (file)

index 0000000..5dd382d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleEqual.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleEqual *)
+{
+  return _graph->nodes()->create<luci::CircleEqual>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleEqual.test.cpp b/compiler/luci/service/src/Nodes/CircleEqual.test.cpp

new file mode 100644 (file)

index 0000000..99a5535
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleEqual.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Equal)
+{
+  auto g = loco::make_graph();
+  auto node_eq = g->nodes()->create<luci::CircleEqual>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_eq, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_eq = dynamic_cast<luci::CircleEqual *>(cloned);
+  ASSERT_NE(nullptr, cloned_eq);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleExp.cpp b/compiler/luci/service/src/Nodes/CircleExp.cpp

new file mode 100644 (file)

index 0000000..3d49183
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleExp.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleExp *)
+{
+  return _graph->nodes()->create<luci::CircleExp>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleExp.test.cpp b/compiler/luci/service/src/Nodes/CircleExp.test.cpp

new file mode 100644 (file)

index 0000000..ff2bb65
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleExp.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Exp)
+{
+  auto g = loco::make_graph();
+  auto node_exp = g->nodes()->create<luci::CircleExp>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_exp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_exp = dynamic_cast<luci::CircleExp *>(cloned);
+  ASSERT_NE(nullptr, cloned_exp);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleExpandDims.cpp b/compiler/luci/service/src/Nodes/CircleExpandDims.cpp

new file mode 100644 (file)

index 0000000..4dd1cec
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleExpandDims.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleExpandDims *)
+{
+  return _graph->nodes()->create<luci::CircleExpandDims>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleExpandDims.test.cpp b/compiler/luci/service/src/Nodes/CircleExpandDims.test.cpp

new file mode 100644 (file)

index 0000000..e3481bc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleExpandDims.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_expand_dims)
+{
+  luci::CircleInput input;
+  luci::CircleConst axis;
+  luci::CircleExpandDims expand_dims;
+
+  input.shape({4, 3});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  axis.dtype(loco::DataType::S32);
+  axis.rank(0);
+  axis.size<loco::DataType::S32>(1);
+  axis.at<loco::DataType::S32>(0) = 1;
+  axis.shape_status(luci::ShapeStatus::VALID);
+
+  expand_dims.input(&input);
+  expand_dims.axis(&axis);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&expand_dims, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(4, shape.dim(0).value());
+  ASSERT_EQ(1, shape.dim(1).value());
+  ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_ExpandDims)
+{
+  auto g = loco::make_graph();
+  auto node_ed = g->nodes()->create<luci::CircleExpandDims>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ed, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ed = dynamic_cast<luci::CircleExpandDims *>(cloned);
+  ASSERT_NE(nullptr, cloned_ed);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/service/src/Nodes/CircleFakeQuant.cpp

new file mode 100644 (file)

index 0000000..7abaca6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFakeQuant.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFakeQuant *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleFakeQuant>();
+  if (cloned != nullptr)
+  {
+    cloned->min(node->min());
+    cloned->max(node->max());
+    cloned->num_bits(node->num_bits());
+    cloned->narrow_range(node->narrow_range());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/service/src/Nodes/CircleFakeQuant.test.cpp

new file mode 100644 (file)

index 0000000..2c4e3b8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFakeQuant.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FakeQuant)
+{
+  auto g = loco::make_graph();
+  auto node_fq = g->nodes()->create<luci::CircleFakeQuant>();
+  node_fq->min(1.0f);
+  node_fq->max(2.0f);
+  node_fq->num_bits(8);
+  node_fq->narrow_range(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fq, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fq = dynamic_cast<luci::CircleFakeQuant *>(cloned);
+  ASSERT_NE(nullptr, cloned_fq);
+  ASSERT_EQ(node_fq->min(), cloned_fq->min());
+  ASSERT_EQ(node_fq->max(), cloned_fq->max());
+  ASSERT_EQ(node_fq->num_bits(), cloned_fq->num_bits());
+  ASSERT_EQ(node_fq->narrow_range(), cloned_fq->narrow_range());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFill.cpp b/compiler/luci/service/src/Nodes/CircleFill.cpp

new file mode 100644 (file)

index 0000000..d9b74c6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFill.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFill *)
+{
+  return _graph->nodes()->create<luci::CircleFill>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFill.test.cpp b/compiler/luci/service/src/Nodes/CircleFill.test.cpp

new file mode 100644 (file)

index 0000000..56c8075
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFill.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Fill)
+{
+  auto g = loco::make_graph();
+  auto node_fill = g->nodes()->create<luci::CircleFill>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fill, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fill = dynamic_cast<luci::CircleFill *>(cloned);
+  ASSERT_NE(nullptr, cloned_fill);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFloor.cpp b/compiler/luci/service/src/Nodes/CircleFloor.cpp

new file mode 100644 (file)

index 0000000..532808b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloor.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFloor *)
+{
+  return _graph->nodes()->create<luci::CircleFloor>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFloor.test.cpp b/compiler/luci/service/src/Nodes/CircleFloor.test.cpp

new file mode 100644 (file)

index 0000000..3d53fd2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloor.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Floor)
+{
+  auto g = loco::make_graph();
+  auto node_floor = g->nodes()->create<luci::CircleFloor>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_floor, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_floor = dynamic_cast<luci::CircleFloor *>(cloned);
+  ASSERT_NE(nullptr, cloned_floor);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/service/src/Nodes/CircleFloorDiv.cpp

new file mode 100644 (file)

index 0000000..65be3e8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloorDiv.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFloorDiv *)
+{
+  return _graph->nodes()->create<luci::CircleFloorDiv>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFloorDiv.test.cpp b/compiler/luci/service/src/Nodes/CircleFloorDiv.test.cpp

new file mode 100644 (file)

index 0000000..6365ccd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloorDiv.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FloorDiv)
+{
+  auto g = loco::make_graph();
+  auto node_floordiv = g->nodes()->create<luci::CircleFloorDiv>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_floordiv, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_floordiv = dynamic_cast<luci::CircleFloorDiv *>(cloned);
+  ASSERT_NE(nullptr, cloned_floordiv);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFloorMod.cpp b/compiler/luci/service/src/Nodes/CircleFloorMod.cpp

new file mode 100644 (file)

index 0000000..00e6a04
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloorMod.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFloorMod *)
+{
+  return _graph->nodes()->create<luci::CircleFloorMod>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFloorMod.test.cpp b/compiler/luci/service/src/Nodes/CircleFloorMod.test.cpp

new file mode 100644 (file)

index 0000000..ce91d58
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloorMod.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FloorMod)
+{
+  auto g = loco::make_graph();
+  auto node_floormod = g->nodes()->create<luci::CircleFloorMod>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_floormod, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_floormod = dynamic_cast<luci::CircleFloorMod *>(cloned);
+  ASSERT_NE(nullptr, cloned_floormod);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/service/src/Nodes/CircleFullyConnected.cpp

new file mode 100644 (file)

index 0000000..8acb35c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFullyConnected.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFullyConnected *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->weights_format() == luci::CircleFullyConnected::WeightsFormat::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleFullyConnected>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->weights_format(node->weights_format());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/service/src/Nodes/CircleFullyConnected.test.cpp

new file mode 100644 (file)

index 0000000..965b591
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFullyConnected.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FullyConnected)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fc = dynamic_cast<luci::CircleFullyConnected *>(cloned);
+  ASSERT_NE(nullptr, cloned_fc);
+  ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+  ASSERT_EQ(node_fc->weights_format(), cloned_fc->weights_format());
+}
+
+TEST(CloneNodeTest, clone_FullyConnected_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_FullyConnected_wf_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGather.cpp b/compiler/luci/service/src/Nodes/CircleGather.cpp

new file mode 100644 (file)

index 0000000..072bdea
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGather.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleGather *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleGather>();
+  if (cloned != nullptr)
+    cloned->axis(node->axis());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGather.test.cpp b/compiler/luci/service/src/Nodes/CircleGather.test.cpp

new file mode 100644 (file)

index 0000000..f48dbdb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGather.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Gather)
+{
+  auto g = loco::make_graph();
+  auto node_gat = g->nodes()->create<luci::CircleGather>();
+  node_gat->axis(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gat, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gat = dynamic_cast<luci::CircleGather *>(cloned);
+  ASSERT_NE(nullptr, cloned_gat);
+  ASSERT_EQ(node_gat->axis(), cloned_gat->axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGatherNd.cpp b/compiler/luci/service/src/Nodes/CircleGatherNd.cpp

new file mode 100644 (file)

index 0000000..df7ae6e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGatherNd.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleGatherNd *)
+{
+  return _graph->nodes()->create<luci::CircleGatherNd>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGatherNd.test.cpp b/compiler/luci/service/src/Nodes/CircleGatherNd.test.cpp

new file mode 100644 (file)

index 0000000..3a70571
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGatherNd.test.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <oops/InternalExn.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, gather_nd_simple)
+{
+  luci::CircleInput input;
+  luci::CircleConst indices_const;
+  luci::CircleGatherNd gather_nd;
+
+  input.shape({1, 4, 4, 3});
+  indices_const.shape({1, 2, 3});
+
+  input.shape_status(luci::ShapeStatus::VALID);
+  indices_const.shape_status(luci::ShapeStatus::VALID);
+
+  gather_nd.params(&input);
+  gather_nd.indices(&indices_const);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&gather_nd, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(2, shape.dim(1).value());
+  ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(ShapeRuleTest, gather_nd_slices)
+{
+  luci::CircleInput input;
+  luci::CircleConst indices_const;
+  luci::CircleGatherNd gather_nd;
+
+  input.shape({1, 4, 4, 3});
+  indices_const.shape({1, 2, 1});
+
+  input.shape_status(luci::ShapeStatus::VALID);
+  indices_const.shape_status(luci::ShapeStatus::VALID);
+
+  gather_nd.params(&input);
+  gather_nd.indices(&indices_const);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&gather_nd, shape));
+  ASSERT_EQ(5, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(2, shape.dim(1).value());
+  ASSERT_EQ(4, shape.dim(2).value());
+  ASSERT_EQ(4, shape.dim(3).value());
+  ASSERT_EQ(3, shape.dim(4).value());
+}
+
+TEST(ShapeRuleTest, gather_nd_NEG)
+{
+  luci::CircleInput input;
+  luci::CircleConst indices_const;
+  luci::CircleGatherNd gather_nd;
+
+  input.shape({1, 4, 4, 3});
+  indices_const.shape({1, 2, 5});
+
+  input.shape_status(luci::ShapeStatus::VALID);
+  indices_const.shape_status(luci::ShapeStatus::VALID);
+
+  gather_nd.params(&input);
+  gather_nd.indices(&indices_const);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_THROW(shape_inf_rule.infer(&gather_nd, shape), oops::InternalExn);
+}
+
+TEST(CloneNodeTest, clone_GatherNd)
+{
+  auto g = loco::make_graph();
+  auto node_gtnd = g->nodes()->create<luci::CircleGatherNd>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gtnd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gtnd = dynamic_cast<luci::CircleGatherNd *>(cloned);
+  ASSERT_NE(nullptr, cloned_gtnd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGreater.cpp b/compiler/luci/service/src/Nodes/CircleGreater.cpp

new file mode 100644 (file)

index 0000000..366d955
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGreater.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleGreater *)
+{
+  return _graph->nodes()->create<luci::CircleGreater>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGreater.test.cpp b/compiler/luci/service/src/Nodes/CircleGreater.test.cpp

new file mode 100644 (file)

index 0000000..6d2df61
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGreater.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Greater)
+{
+  auto g = loco::make_graph();
+  auto node_gt = g->nodes()->create<luci::CircleGreater>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gt, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gt = dynamic_cast<luci::CircleGreater *>(cloned);
+  ASSERT_NE(nullptr, cloned_gt);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/service/src/Nodes/CircleGreaterEqual.cpp

new file mode 100644 (file)

index 0000000..9705bbe
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGreaterEqual.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleGreaterEqual *)
+{
+  return _graph->nodes()->create<luci::CircleGreaterEqual>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGreaterEqual.test.cpp b/compiler/luci/service/src/Nodes/CircleGreaterEqual.test.cpp

new file mode 100644 (file)

index 0000000..10387df
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGreaterEqual.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_GreaterEqual)
+{
+  auto g = loco::make_graph();
+  auto node_ge = g->nodes()->create<luci::CircleGreaterEqual>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ge, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ge = dynamic_cast<luci::CircleGreaterEqual *>(cloned);
+  ASSERT_NE(nullptr, cloned_ge);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleIfOut.cpp b/compiler/luci/service/src/Nodes/CircleIfOut.cpp

new file mode 100644 (file)

index 0000000..31ad720
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleIfOut.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/CircleTypeInference.h>
+
+namespace
+{
+
+struct CircleIfOutGraphs
+{
+  loco::GraphOutput *then_graph_output;
+  loco::GraphOutput *else_graph_output;
+};
+
+} // namespace
+
+namespace
+{
+
+CircleIfOutGraphs get_out_graphs(const luci::CircleIfOut *node)
+{
+  CircleIfOutGraphs ret_out;
+
+  /**
+   * @note  IF operator type and shape are that of the "then" and "else"
+   *        Graph Outputs.
+   */
+  auto circle_if = loco::must_cast<const luci::CircleIf *>(node->input());
+
+  auto index = node->index();
+  auto then_graph = circle_if->then_graph();
+  auto else_graph = circle_if->else_graph();
+  assert(then_graph != nullptr);
+  assert(else_graph != nullptr);
+
+  // shape and type are assumed to be same
+  // these are checked at post_import_graph() in Import
+  auto then_outputs = loco::output_nodes(then_graph);
+  auto else_outputs = loco::output_nodes(else_graph);
+  assert(then_outputs.size() == else_outputs.size());
+  assert(index < static_cast<int32_t>(then_outputs.size()));
+
+  auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
+  auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
+
+  auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
+  auto else_graph_outputs = else_graph->outputs();
+  assert(then_graph_outputs->size() == else_graph_outputs->size());
+
+  ret_out.then_graph_output = then_graph_outputs->at(then_out->index());
+  ret_out.else_graph_output = else_graph_outputs->at(else_out->index());
+
+  return ret_out;
+}
+
+} // namespace
+
+namespace luci
+{
+
+loco::TensorShape sinf::Algorithm::visit(const luci::CircleIfOut *node)
+{
+  auto graphs = get_out_graphs(node);
+  assert(*graphs.then_graph_output->shape() == *graphs.else_graph_output->shape());
+  return *graphs.then_graph_output->shape();
+}
+
+loco::DataType tinf::Algorithm::visit(const luci::CircleIfOut *node)
+{
+  auto graphs = get_out_graphs(node);
+  assert(graphs.then_graph_output->dtype() == graphs.else_graph_output->dtype());
+  return graphs.then_graph_output->dtype();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleInput.cpp b/compiler/luci/service/src/Nodes/CircleInput.cpp

deleted file mode 100644 (file)

index 24eab7b..0000000
--- a/compiler/luci/service/src/Nodes/CircleInput.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <luci/Service/CircleShapeSignatureInference.h>
-
-namespace luci
-{
-
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleInput *node)
-{
-  return node->shape_signature();
-}
-
-} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/service/src/Nodes/CircleInstanceNorm.cpp

new file mode 100644 (file)

index 0000000..d9e49d8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleInstanceNorm.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleInstanceNorm *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleInstanceNorm>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->epsilon(node->epsilon());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleInstanceNorm.test.cpp b/compiler/luci/service/src/Nodes/CircleInstanceNorm.test.cpp

new file mode 100644 (file)

index 0000000..bae92b1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleInstanceNorm.test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_InstanceNorm)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleInstanceNorm>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_fc->epsilon(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fc = dynamic_cast<luci::CircleInstanceNorm *>(cloned);
+  ASSERT_NE(nullptr, cloned_fc);
+  ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+  ASSERT_EQ(node_fc->epsilon(), cloned_fc->epsilon());
+}
+
+TEST(CloneNodeTest, clone_InstanceNorm_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleInstanceNorm>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/service/src/Nodes/CircleL2Normalize.cpp

new file mode 100644 (file)

index 0000000..afa2a6a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleL2Normalize.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleL2Normalize *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleL2Normalize>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleL2Normalize.test.cpp b/compiler/luci/service/src/Nodes/CircleL2Normalize.test.cpp

new file mode 100644 (file)

index 0000000..0f14879
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleL2Normalize.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_L2Normalize)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Normalize>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_l2n = dynamic_cast<luci::CircleL2Normalize *>(cloned);
+  ASSERT_NE(nullptr, cloned_l2n);
+  ASSERT_EQ(node_l2n->fusedActivationFunction(), cloned_l2n->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Normalize>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/service/src/Nodes/CircleL2Pool2D.cpp

new file mode 100644 (file)

index 0000000..2d876c5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleL2Pool2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleL2Pool2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleL2Pool2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->filter()->h(node->filter()->h());
+    cloned->filter()->w(node->filter()->w());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleL2Pool2D.test.cpp b/compiler/luci/service/src/Nodes/CircleL2Pool2D.test.cpp

new file mode 100644 (file)

index 0000000..37344fd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleL2Pool2D.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_L2Pool2D)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_l2n->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_l2n = dynamic_cast<luci::CircleL2Pool2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_l2n);
+  ASSERT_EQ(node_l2n->fusedActivationFunction(), cloned_l2n->fusedActivationFunction());
+  ASSERT_EQ(node_l2n->padding(), cloned_l2n->padding());
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_l2n->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_l2n->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/service/src/Nodes/CircleLeakyRelu.cpp

new file mode 100644 (file)

index 0000000..9103061
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLeakyRelu.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLeakyRelu *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleLeakyRelu>();
+  if (cloned != nullptr)
+    cloned->alpha(node->alpha());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLeakyRelu.test.cpp b/compiler/luci/service/src/Nodes/CircleLeakyRelu.test.cpp

new file mode 100644 (file)

index 0000000..17fc144
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLeakyRelu.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LeakyRelu)
+{
+  auto g = loco::make_graph();
+  auto node_lr = g->nodes()->create<luci::CircleLeakyRelu>();
+  node_lr->alpha(1.2f);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_lr, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_lr = dynamic_cast<luci::CircleLeakyRelu *>(cloned);
+  ASSERT_NE(nullptr, cloned_lr);
+  ASSERT_EQ(node_lr->alpha(), cloned_lr->alpha());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLess.cpp b/compiler/luci/service/src/Nodes/CircleLess.cpp

new file mode 100644 (file)

index 0000000..33b70b7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLess.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLess *)
+{
+  return _graph->nodes()->create<luci::CircleLess>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLess.test.cpp b/compiler/luci/service/src/Nodes/CircleLess.test.cpp

new file mode 100644 (file)

index 0000000..4324894
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLess.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Less)
+{
+  auto g = loco::make_graph();
+  auto node_less = g->nodes()->create<luci::CircleLess>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_less, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_less = dynamic_cast<luci::CircleLess *>(cloned);
+  ASSERT_NE(nullptr, cloned_less);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLessEqual.cpp b/compiler/luci/service/src/Nodes/CircleLessEqual.cpp

new file mode 100644 (file)

index 0000000..2249136
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLessEqual.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLessEqual *)
+{
+  return _graph->nodes()->create<luci::CircleLessEqual>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLessEqual.test.cpp b/compiler/luci/service/src/Nodes/CircleLessEqual.test.cpp

new file mode 100644 (file)

index 0000000..0a87daf
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLessEqual.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LessEqual)
+{
+  auto g = loco::make_graph();
+  auto node_le = g->nodes()->create<luci::CircleLessEqual>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_le, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_le = dynamic_cast<luci::CircleLessEqual *>(cloned);
+  ASSERT_NE(nullptr, cloned_le);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.cpp

new file mode 100644 (file)

index 0000000..bf69b5e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLocalResponseNormalization *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleLocalResponseNormalization>();
+  if (cloned != nullptr)
+  {
+    cloned->radius(node->radius());
+    cloned->bias(node->bias());
+    cloned->alpha(node->alpha());
+    cloned->beta(node->beta());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.test.cpp b/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.test.cpp

new file mode 100644 (file)

index 0000000..262b119
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LocalResponseNormalization)
+{
+  auto g = loco::make_graph();
+  auto node_lrn = g->nodes()->create<luci::CircleLocalResponseNormalization>();
+  node_lrn->radius(32);
+  node_lrn->bias(1.2f);
+  node_lrn->alpha(3.4f);
+  node_lrn->beta(5.7f);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_lrn, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_lrn = dynamic_cast<luci::CircleLocalResponseNormalization *>(cloned);
+  ASSERT_NE(nullptr, cloned_lrn);
+  ASSERT_EQ(node_lrn->radius(), cloned_lrn->radius());
+  ASSERT_EQ(node_lrn->bias(), cloned_lrn->bias());
+  ASSERT_EQ(node_lrn->alpha(), cloned_lrn->alpha());
+  ASSERT_EQ(node_lrn->beta(), cloned_lrn->beta());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLog.cpp b/compiler/luci/service/src/Nodes/CircleLog.cpp

new file mode 100644 (file)

index 0000000..5788f12
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLog.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLog *)
+{
+  return _graph->nodes()->create<luci::CircleLog>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLog.test.cpp b/compiler/luci/service/src/Nodes/CircleLog.test.cpp

new file mode 100644 (file)

index 0000000..d1ee142
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLog.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Log)
+{
+  auto g = loco::make_graph();
+  auto node_log = g->nodes()->create<luci::CircleLog>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_log, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_log = dynamic_cast<luci::CircleLog *>(cloned);
+  ASSERT_NE(nullptr, cloned_log);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/service/src/Nodes/CircleLogSoftmax.cpp

new file mode 100644 (file)

index 0000000..352160a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogSoftmax.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogSoftmax *)
+{
+  return _graph->nodes()->create<luci::CircleLogSoftmax>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogSoftmax.test.cpp b/compiler/luci/service/src/Nodes/CircleLogSoftmax.test.cpp

new file mode 100644 (file)

index 0000000..feebb79
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogSoftmax.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogSoftmax)
+{
+  auto g = loco::make_graph();
+  auto node_logs = g->nodes()->create<luci::CircleLogSoftmax>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_logs, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_logs = dynamic_cast<luci::CircleLogSoftmax *>(cloned);
+  ASSERT_NE(nullptr, cloned_logs);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/service/src/Nodes/CircleLogicalAnd.cpp

new file mode 100644 (file)

index 0000000..5df62b9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalAnd.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogicalAnd *)
+{
+  return _graph->nodes()->create<luci::CircleLogicalAnd>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalAnd.test.cpp b/compiler/luci/service/src/Nodes/CircleLogicalAnd.test.cpp

new file mode 100644 (file)

index 0000000..aa811ed
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalAnd.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalAnd)
+{
+  auto g = loco::make_graph();
+  auto node_logand = g->nodes()->create<luci::CircleLogicalAnd>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_logand, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_logand = dynamic_cast<luci::CircleLogicalAnd *>(cloned);
+  ASSERT_NE(nullptr, cloned_logand);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/service/src/Nodes/CircleLogicalNot.cpp

new file mode 100644 (file)

index 0000000..ac98282
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalNot.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogicalNot *)
+{
+  return _graph->nodes()->create<luci::CircleLogicalNot>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalNot.test.cpp b/compiler/luci/service/src/Nodes/CircleLogicalNot.test.cpp

new file mode 100644 (file)

index 0000000..9e55be9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalNot.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalNot)
+{
+  auto g = loco::make_graph();
+  auto node_lognot = g->nodes()->create<luci::CircleLogicalNot>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_lognot, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_lognot = dynamic_cast<luci::CircleLogicalNot *>(cloned);
+  ASSERT_NE(nullptr, cloned_lognot);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/service/src/Nodes/CircleLogicalOr.cpp

new file mode 100644 (file)

index 0000000..1201d6f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalOr.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogicalOr *)
+{
+  return _graph->nodes()->create<luci::CircleLogicalOr>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalOr.test.cpp b/compiler/luci/service/src/Nodes/CircleLogicalOr.test.cpp

new file mode 100644 (file)

index 0000000..19b706d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalOr.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalOr)
+{
+  auto g = loco::make_graph();
+  auto node_logor = g->nodes()->create<luci::CircleLogicalOr>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_logor, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_logor = dynamic_cast<luci::CircleLogicalOr *>(cloned);
+  ASSERT_NE(nullptr, cloned_logor);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogistic.cpp b/compiler/luci/service/src/Nodes/CircleLogistic.cpp

new file mode 100644 (file)

index 0000000..b21b187
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogistic.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogistic *)
+{
+  return _graph->nodes()->create<luci::CircleLogistic>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogistic.test.cpp b/compiler/luci/service/src/Nodes/CircleLogistic.test.cpp

new file mode 100644 (file)

index 0000000..05dbe46
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogistic.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Logistic)
+{
+  auto g = loco::make_graph();
+  auto node_log = g->nodes()->create<luci::CircleLogistic>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_log, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_log = dynamic_cast<luci::CircleLogistic *>(cloned);
+  ASSERT_NE(nullptr, cloned_log);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/service/src/Nodes/CircleMatrixDiag.cpp

new file mode 100644 (file)

index 0000000..2bffa07
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMatrixDiag.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMatrixDiag *)
+{
+  return _graph->nodes()->create<luci::CircleMatrixDiag>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMatrixDiag.test.cpp b/compiler/luci/service/src/Nodes/CircleMatrixDiag.test.cpp

new file mode 100644 (file)

index 0000000..c08c4cb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMatrixDiag.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MatrixDiag)
+{
+  auto g = loco::make_graph();
+  auto node_md = g->nodes()->create<luci::CircleMatrixDiag>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_md, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_md = dynamic_cast<luci::CircleMatrixDiag *>(cloned);
+  ASSERT_NE(nullptr, cloned_md);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.cpp

new file mode 100644 (file)

index 0000000..5ea2a53
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMatrixSetDiag *)
+{
+  return _graph->nodes()->create<luci::CircleMatrixSetDiag>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.test.cpp b/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.test.cpp

new file mode 100644 (file)

index 0000000..5ea77ba
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MatrixSetDiag)
+{
+  auto g = loco::make_graph();
+  auto node_msd = g->nodes()->create<luci::CircleMatrixSetDiag>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_msd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_msd = dynamic_cast<luci::CircleMatrixSetDiag *>(cloned);
+  ASSERT_NE(nullptr, cloned_msd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/service/src/Nodes/CircleMaxPool2D.cpp

new file mode 100644 (file)

index 0000000..b21610c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMaxPool2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMaxPool2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleMaxPool2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->filter()->h(node->filter()->h());
+    cloned->filter()->w(node->filter()->w());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMaxPool2D.test.cpp b/compiler/luci/service/src/Nodes/CircleMaxPool2D.test.cpp

new file mode 100644 (file)

index 0000000..415cf7c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMaxPool2D.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MaxPool2D)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+  node_mp->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_mp->padding(luci::Padding::SAME);
+  node_mp->filter()->h(1);
+  node_mp->filter()->w(2);
+  node_mp->stride()->h(3);
+  node_mp->stride()->w(4);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_mp = dynamic_cast<luci::CircleMaxPool2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_mp);
+  ASSERT_EQ(node_mp->fusedActivationFunction(), cloned_mp->fusedActivationFunction());
+  ASSERT_EQ(node_mp->padding(), cloned_mp->padding());
+  ASSERT_EQ(node_mp->filter()->h(), cloned_mp->filter()->h());
+  ASSERT_EQ(node_mp->filter()->w(), cloned_mp->filter()->w());
+  ASSERT_EQ(node_mp->stride()->h(), cloned_mp->stride()->h());
+  ASSERT_EQ(node_mp->stride()->w(), cloned_mp->stride()->w());
+}
+
+TEST(CloneNodeTest, clone_MaxPool2D_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+  node_mp->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_mp->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_MaxPool2D_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+  node_mp->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_mp->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMaximum.cpp b/compiler/luci/service/src/Nodes/CircleMaximum.cpp

new file mode 100644 (file)

index 0000000..545f4ca
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMaximum.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMaximum *)
+{
+  return _graph->nodes()->create<luci::CircleMaximum>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMaximum.test.cpp b/compiler/luci/service/src/Nodes/CircleMaximum.test.cpp

new file mode 100644 (file)

index 0000000..6f1ada0
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMaximum.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Maximum)
+{
+  auto g = loco::make_graph();
+  auto node_max = g->nodes()->create<luci::CircleMaximum>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_max, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_max = dynamic_cast<luci::CircleMaximum *>(cloned);
+  ASSERT_NE(nullptr, cloned_max);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMean.cpp b/compiler/luci/service/src/Nodes/CircleMean.cpp

index a78713698417da982d362d6235d5da97628b3c5d..95bc5453292a5d1162748d5dfdd417936fca7f68 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleMean.cpp
+++ b/compiler/luci/service/src/Nodes/CircleMean.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,15 +14,17 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleMean *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleMean *node)
  {
-  return legalized_signature(
-      reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+  auto *cloned = _graph->nodes()->create<luci::CircleMean>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMean.test.cpp b/compiler/luci/service/src/Nodes/CircleMean.test.cpp

new file mode 100644 (file)

index 0000000..aa1b88f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMean.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Mean)
+{
+  auto g = loco::make_graph();
+  auto node_mean = g->nodes()->create<luci::CircleMean>();
+  node_mean->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mean, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_mean = dynamic_cast<luci::CircleMean *>(cloned);
+  ASSERT_NE(nullptr, cloned_mean);
+  ASSERT_EQ(node_mean->keep_dims(), cloned_mean->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMinimum.cpp b/compiler/luci/service/src/Nodes/CircleMinimum.cpp

new file mode 100644 (file)

index 0000000..2c2755c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMinimum.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMinimum *)
+{
+  return _graph->nodes()->create<luci::CircleMinimum>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMinimum.test.cpp b/compiler/luci/service/src/Nodes/CircleMinimum.test.cpp

new file mode 100644 (file)

index 0000000..0a54be7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMinimum.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Minimum)
+{
+  auto g = loco::make_graph();
+  auto node_min = g->nodes()->create<luci::CircleMinimum>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_min, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_min = dynamic_cast<luci::CircleMinimum *>(cloned);
+  ASSERT_NE(nullptr, cloned_min);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/service/src/Nodes/CircleMirrorPad.cpp

new file mode 100644 (file)

index 0000000..919221a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMirrorPad.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMirrorPad *node)
+{
+  if (node->mode() == luci::MirrorPadMode::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleMirrorPad>();
+  if (cloned != nullptr)
+    cloned->mode(node->mode());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMirrorPad.test.cpp b/compiler/luci/service/src/Nodes/CircleMirrorPad.test.cpp

new file mode 100644 (file)

index 0000000..911cf6d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMirrorPad.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MirrorPad)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMirrorPad>();
+  node_mp->mode(luci::MirrorPadMode::REFLECT);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_mp = dynamic_cast<luci::CircleMirrorPad *>(cloned);
+  ASSERT_NE(nullptr, cloned_mp);
+  ASSERT_EQ(node_mp->mode(), cloned_mp->mode());
+}
+
+TEST(CloneNodeTest, clone_MirrorPad_mode_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMirrorPad>();
+  node_mp->mode(luci::MirrorPadMode::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMul.cpp b/compiler/luci/service/src/Nodes/CircleMul.cpp

new file mode 100644 (file)

index 0000000..096aed1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMul.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMul *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleMul>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMul.test.cpp b/compiler/luci/service/src/Nodes/CircleMul.test.cpp

new file mode 100644 (file)

index 0000000..dc5565f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMul.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Mul)
+{
+  auto g = loco::make_graph();
+  auto node_mul = g->nodes()->create<luci::CircleMul>();
+  node_mul->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mul, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_mul = dynamic_cast<luci::CircleMul *>(cloned);
+  ASSERT_NE(nullptr, cloned_mul);
+  ASSERT_EQ(node_mul->fusedActivationFunction(), cloned_mul->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Mul_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_mul = g->nodes()->create<luci::CircleMul>();
+  node_mul->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mul, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNeg.cpp b/compiler/luci/service/src/Nodes/CircleNeg.cpp

new file mode 100644 (file)

index 0000000..312189e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNeg.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNeg *)
+{
+  return _graph->nodes()->create<luci::CircleNeg>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNeg.test.cpp b/compiler/luci/service/src/Nodes/CircleNeg.test.cpp

new file mode 100644 (file)

index 0000000..8c28803
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNeg.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Neg)
+{
+  auto g = loco::make_graph();
+  auto node_neg = g->nodes()->create<luci::CircleNeg>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_neg, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_neg = dynamic_cast<luci::CircleNeg *>(cloned);
+  ASSERT_NE(nullptr, cloned_neg);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.cpp

new file mode 100644 (file)

index 0000000..4757e83
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV4 *)
+{
+  return _graph->nodes()->create<luci::CircleNonMaxSuppressionV4>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.test.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.test.cpp

new file mode 100644 (file)

index 0000000..34f5b03
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV4)
+{
+  auto g = loco::make_graph();
+  auto node_nms = g->nodes()->create<luci::CircleNonMaxSuppressionV4>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_nms, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_nms = dynamic_cast<luci::CircleNonMaxSuppressionV4 *>(cloned);
+  ASSERT_NE(nullptr, cloned_nms);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.cpp

new file mode 100644 (file)

index 0000000..2a12f2a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV4Out *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleNonMaxSuppressionV4Out>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp

new file mode 100644 (file)

index 0000000..ed9e0e0
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV4Out)
+{
+  auto g = loco::make_graph();
+  auto node_nout = g->nodes()->create<luci::CircleNonMaxSuppressionV4Out>();
+  node_nout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_nout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_nout = dynamic_cast<luci::CircleNonMaxSuppressionV4Out *>(cloned);
+  ASSERT_NE(nullptr, cloned_nout);
+  ASSERT_EQ(node_nout->index(), cloned_nout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.cpp

new file mode 100644 (file)

index 0000000..34d1280
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV5 *)
+{
+  return _graph->nodes()->create<luci::CircleNonMaxSuppressionV5>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.test.cpp

new file mode 100644 (file)

index 0000000..faaee96
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV5)
+{
+  auto g = loco::make_graph();
+  auto node_nms = g->nodes()->create<luci::CircleNonMaxSuppressionV5>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_nms, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_nms = dynamic_cast<luci::CircleNonMaxSuppressionV5 *>(cloned);
+  ASSERT_NE(nullptr, cloned_nms);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.cpp

new file mode 100644 (file)

index 0000000..e1d7875
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV5Out *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleNonMaxSuppressionV5Out>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp

new file mode 100644 (file)

index 0000000..ef0f766
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV5Out)
+{
+  auto g = loco::make_graph();
+  auto node_nout = g->nodes()->create<luci::CircleNonMaxSuppressionV5Out>();
+  node_nout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_nout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_nout = dynamic_cast<luci::CircleNonMaxSuppressionV5Out *>(cloned);
+  ASSERT_NE(nullptr, cloned_nout);
+  ASSERT_EQ(node_nout->index(), cloned_nout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNotEqual.cpp b/compiler/luci/service/src/Nodes/CircleNotEqual.cpp

new file mode 100644 (file)

index 0000000..4cb5320
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNotEqual.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNotEqual *)
+{
+  return _graph->nodes()->create<luci::CircleNotEqual>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNotEqual.test.cpp b/compiler/luci/service/src/Nodes/CircleNotEqual.test.cpp

new file mode 100644 (file)

index 0000000..20f7dbc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNotEqual.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NotEqual)
+{
+  auto g = loco::make_graph();
+  auto node_ne = g->nodes()->create<luci::CircleNotEqual>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ne, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ne = dynamic_cast<luci::CircleNotEqual *>(cloned);
+  ASSERT_NE(nullptr, cloned_ne);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleOneHot.cpp b/compiler/luci/service/src/Nodes/CircleOneHot.cpp

new file mode 100644 (file)

index 0000000..a33c8ff
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOneHot.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleOneHot *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleOneHot>();
+  if (cloned != nullptr)
+    cloned->axis(node->axis());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleOneHot.test.cpp b/compiler/luci/service/src/Nodes/CircleOneHot.test.cpp

new file mode 100644 (file)

index 0000000..dea927d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOneHot.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OneHot)
+{
+  auto g = loco::make_graph();
+  auto node_oh = g->nodes()->create<luci::CircleOneHot>();
+  node_oh->axis(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_oh, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_oh = dynamic_cast<luci::CircleOneHot *>(cloned);
+  ASSERT_NE(nullptr, cloned_oh);
+  ASSERT_EQ(node_oh->axis(), cloned_oh->axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleOutput.cpp b/compiler/luci/service/src/Nodes/CircleOutput.cpp

deleted file mode 100644 (file)

index d4c8da2..0000000
--- a/compiler/luci/service/src/Nodes/CircleOutput.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <luci/Service/CircleShapeSignatureInference.h>
-
-namespace luci
-{
-
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutput *node)
-{
-  return input_arg_signature(node, 0);
-}
-
-} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp

index e0f13c439eb34f84f353f390d765b174964af393..ce94dff944b2df895e5200919d18d01e4acb24b8 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp
+++ b/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,11 +14,14 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputDummy *) { return ShapeSignature(); }
+luci::CircleNode *CloneNode::visit(const luci::CircleOutputDummy *)
+{
+  return _graph->nodes()->create<luci::CircleOutputDummy>();
+}
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleOutputDummy.test.cpp b/compiler/luci/service/src/Nodes/CircleOutputDummy.test.cpp

new file mode 100644 (file)

index 0000000..6170c7c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOutputDummy.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OutputDummy)
+{
+  auto g = loco::make_graph();
+  auto node_dummy = g->nodes()->create<luci::CircleOutputDummy>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dummy, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_dummy = dynamic_cast<luci::CircleOutputDummy *>(cloned);
+  ASSERT_NE(nullptr, cloned_dummy);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp

index 75bbbb3c0131ac094fd6a92f6c08f1987ff33c70..1b0f919c3557f13f7d622cf11d3d7ade0d7774c5 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp
+++ b/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,14 +14,14 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputExclude *)
+luci::CircleNode *CloneNode::visit(const luci::CircleOutputExclude *)
  {
-  return ShapeSignature();
+  return _graph->nodes()->create<luci::CircleOutputExclude>();
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleOutputExclude.test.cpp b/compiler/luci/service/src/Nodes/CircleOutputExclude.test.cpp

new file mode 100644 (file)

index 0000000..120ffe8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOutputExclude.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OutputExclude)
+{
+  auto g = loco::make_graph();
+  auto node_outex = g->nodes()->create<luci::CircleOutputExclude>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_outex, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_outex = dynamic_cast<luci::CircleOutputExclude *>(cloned);
+  ASSERT_NE(nullptr, cloned_outex);
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePRelu.cpp b/compiler/luci/service/src/Nodes/CirclePRelu.cpp

new file mode 100644 (file)

index 0000000..8a34e50
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePRelu.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePRelu *)
+{
+  return _graph->nodes()->create<luci::CirclePRelu>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePRelu.test.cpp b/compiler/luci/service/src/Nodes/CirclePRelu.test.cpp

new file mode 100644 (file)

index 0000000..1150e3f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePRelu.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_PRelu)
+{
+  auto g = loco::make_graph();
+  auto node_pr = g->nodes()->create<luci::CirclePRelu>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pr, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pr = dynamic_cast<luci::CirclePRelu *>(cloned);
+  ASSERT_NE(nullptr, cloned_pr);
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePack.cpp b/compiler/luci/service/src/Nodes/CirclePack.cpp

new file mode 100644 (file)

index 0000000..a3cee0b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePack.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePack *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CirclePack>(node->values_count());
+  if (cloned != nullptr)
+    cloned->axis(node->axis());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePack.test.cpp b/compiler/luci/service/src/Nodes/CirclePack.test.cpp

new file mode 100644 (file)

index 0000000..b808956
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePack.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pack)
+{
+  auto g = loco::make_graph();
+  auto node_pack = g->nodes()->create<luci::CirclePack>(3);
+  node_pack->axis(7);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pack, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pack = dynamic_cast<luci::CirclePack *>(cloned);
+  ASSERT_NE(nullptr, cloned_pack);
+  ASSERT_EQ(node_pack->values_count(), cloned_pack->values_count());
+  ASSERT_EQ(node_pack->axis(), cloned_pack->axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePad.cpp b/compiler/luci/service/src/Nodes/CirclePad.cpp

new file mode 100644 (file)

index 0000000..425bdce
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePad.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePad *)
+{
+  return _graph->nodes()->create<luci::CirclePad>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePad.test.cpp b/compiler/luci/service/src/Nodes/CirclePad.test.cpp

new file mode 100644 (file)

index 0000000..1d5f837
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePad.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pad)
+{
+  auto g = loco::make_graph();
+  auto node_pad = g->nodes()->create<luci::CirclePad>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pad, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pad = dynamic_cast<luci::CirclePad *>(cloned);
+  ASSERT_NE(nullptr, cloned_pad);
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePadV2.cpp b/compiler/luci/service/src/Nodes/CirclePadV2.cpp

new file mode 100644 (file)

index 0000000..0e93869
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePadV2.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePadV2 *)
+{
+  return _graph->nodes()->create<luci::CirclePadV2>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePadV2.test.cpp b/compiler/luci/service/src/Nodes/CirclePadV2.test.cpp

new file mode 100644 (file)

index 0000000..d011f69
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePadV2.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_PadV2)
+{
+  auto g = loco::make_graph();
+  auto node_pad = g->nodes()->create<luci::CirclePadV2>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pad, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pad = dynamic_cast<luci::CirclePadV2 *>(cloned);
+  ASSERT_NE(nullptr, cloned_pad);
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePow.cpp b/compiler/luci/service/src/Nodes/CirclePow.cpp

new file mode 100644 (file)

index 0000000..bf93889
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePow.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePow *)
+{
+  return _graph->nodes()->create<luci::CirclePow>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePow.test.cpp b/compiler/luci/service/src/Nodes/CirclePow.test.cpp

new file mode 100644 (file)

index 0000000..9462989
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePow.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pow)
+{
+  auto g = loco::make_graph();
+  auto node_pow = g->nodes()->create<luci::CirclePow>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pow, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pow = dynamic_cast<luci::CirclePow *>(cloned);
+  ASSERT_NE(nullptr, cloned_pow);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRange.cpp b/compiler/luci/service/src/Nodes/CircleRange.cpp

new file mode 100644 (file)

index 0000000..9c6f7b4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRange.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleRange *)
+{
+  return _graph->nodes()->create<luci::CircleRange>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRange.test.cpp b/compiler/luci/service/src/Nodes/CircleRange.test.cpp

new file mode 100644 (file)

index 0000000..b2fb296
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRange.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Range)
+{
+  auto g = loco::make_graph();
+  auto node_range = g->nodes()->create<luci::CircleRange>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_range, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_range = dynamic_cast<luci::CircleRange *>(cloned);
+  ASSERT_NE(nullptr, cloned_range);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRank.cpp b/compiler/luci/service/src/Nodes/CircleRank.cpp

new file mode 100644 (file)

index 0000000..db8171c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRank.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleRank *)
+{
+  return _graph->nodes()->create<luci::CircleRank>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRank.test.cpp b/compiler/luci/service/src/Nodes/CircleRank.test.cpp

new file mode 100644 (file)

index 0000000..0e81fb2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRank.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Rank)
+{
+  auto g = loco::make_graph();
+  auto node_rank = g->nodes()->create<luci::CircleRank>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rank, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rank = dynamic_cast<luci::CircleRank *>(cloned);
+  ASSERT_NE(nullptr, cloned_rank);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReduceAny.cpp b/compiler/luci/service/src/Nodes/CircleReduceAny.cpp

index 27da81466e9897c0e77bd385accafe7a3c0bf9c3..3ab0b3b59c2269131dd44d3df76883935bedf7ba 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleReduceAny.cpp
+++ b/compiler/luci/service/src/Nodes/CircleReduceAny.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,15 +14,17 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceAny *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReduceAny *node)
  {
-  return legalized_signature(
-      reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+  auto *cloned = _graph->nodes()->create<luci::CircleReduceAny>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReduceAny.test.cpp b/compiler/luci/service/src/Nodes/CircleReduceAny.test.cpp

new file mode 100644 (file)

index 0000000..904b5a1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceAny.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceAny)
+{
+  auto g = loco::make_graph();
+  auto node_ra = g->nodes()->create<luci::CircleReduceAny>();
+  node_ra->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ra, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ra = dynamic_cast<luci::CircleReduceAny *>(cloned);
+  ASSERT_NE(nullptr, cloned_ra);
+  ASSERT_EQ(node_ra->keep_dims(), cloned_ra->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReduceMax.cpp b/compiler/luci/service/src/Nodes/CircleReduceMax.cpp

index 48d9cb9702c1508c60effb611db9b7a54c01fd91..c026905ca02f1cc677f3bf9d713a0fe5db4ba699 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleReduceMax.cpp
+++ b/compiler/luci/service/src/Nodes/CircleReduceMax.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,15 +14,17 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMax *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReduceMax *node)
  {
-  return legalized_signature(
-      reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+  auto *cloned = _graph->nodes()->create<luci::CircleReduceMax>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReduceMax.test.cpp b/compiler/luci/service/src/Nodes/CircleReduceMax.test.cpp

new file mode 100644 (file)

index 0000000..b3f3c88
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceMax.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceMax)
+{
+  auto g = loco::make_graph();
+  auto node_rmax = g->nodes()->create<luci::CircleReduceMax>();
+  node_rmax->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rmax, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rmax = dynamic_cast<luci::CircleReduceMax *>(cloned);
+  ASSERT_NE(nullptr, cloned_rmax);
+  ASSERT_EQ(node_rmax->keep_dims(), cloned_rmax->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReduceMin.cpp b/compiler/luci/service/src/Nodes/CircleReduceMin.cpp

index 9a99971185a9704047016438e7a66475aa6e8b79..3dfa196805b804a9230d2089328f0027ef83f795 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleReduceMin.cpp
+++ b/compiler/luci/service/src/Nodes/CircleReduceMin.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,15 +14,17 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMin *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReduceMin *node)
  {
-  return legalized_signature(
-      reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+  auto *cloned = _graph->nodes()->create<luci::CircleReduceMin>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReduceMin.test.cpp b/compiler/luci/service/src/Nodes/CircleReduceMin.test.cpp

new file mode 100644 (file)

index 0000000..b3faa68
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceMin.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceMin)
+{
+  auto g = loco::make_graph();
+  auto node_rmin = g->nodes()->create<luci::CircleReduceMin>();
+  node_rmin->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rmin, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rmin = dynamic_cast<luci::CircleReduceMin *>(cloned);
+  ASSERT_NE(nullptr, cloned_rmin);
+  ASSERT_EQ(node_rmin->keep_dims(), cloned_rmin->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReduceProd.cpp b/compiler/luci/service/src/Nodes/CircleReduceProd.cpp

index a9d381a7411a3d9dc07830dd92485776835f8a10..418a8ce321bc627a51524e05d3913d7d6ae296cd 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleReduceProd.cpp
+++ b/compiler/luci/service/src/Nodes/CircleReduceProd.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,15 +14,17 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceProd *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReduceProd *node)
  {
-  return legalized_signature(
-      reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+  auto *cloned = _graph->nodes()->create<luci::CircleReduceProd>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReduceProd.test.cpp b/compiler/luci/service/src/Nodes/CircleReduceProd.test.cpp

new file mode 100644 (file)

index 0000000..8caf8e9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceProd.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceProd)
+{
+  auto g = loco::make_graph();
+  auto node_rp = g->nodes()->create<luci::CircleReduceProd>();
+  node_rp->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rp = dynamic_cast<luci::CircleReduceProd *>(cloned);
+  ASSERT_NE(nullptr, cloned_rp);
+  ASSERT_EQ(node_rp->keep_dims(), cloned_rp->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRelu.cpp b/compiler/luci/service/src/Nodes/CircleRelu.cpp

index a7a7f6f0a65d9c88c701c1a17416e9bb724ac6ae..7447eea0c41bef616fd7656931ff1193842621ae 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleRelu.cpp
+++ b/compiler/luci/service/src/Nodes/CircleRelu.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,14 +14,14 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleRelu *)
  {
-  return input_arg_signature(node, 0);
+  return _graph->nodes()->create<luci::CircleRelu>();
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRelu.test.cpp b/compiler/luci/service/src/Nodes/CircleRelu.test.cpp

new file mode 100644 (file)

index 0000000..6154376
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRelu.test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/CircleTypeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_relu)
+{
+  luci::CircleInput input;
+  luci::CircleRelu relu;
+
+  input.shape({3, 4});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  relu.features(&input);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&relu, shape));
+  ASSERT_EQ(2, shape.rank());
+  ASSERT_EQ(3, shape.dim(0).value());
+  ASSERT_EQ(4, shape.dim(1).value());
+}
+
+TEST(DataTypeRuleTest, simple_relu)
+{
+  luci::CircleInput input;
+  luci::CircleRelu relu;
+
+  input.dtype(loco::DataType::S32);
+
+  relu.features(&input);
+
+  loco::DataType dtype;
+  luci::tinf::Rule type_inf_rule;
+
+  ASSERT_TRUE(type_inf_rule.infer(&relu, dtype));
+  ASSERT_EQ(loco::DataType::S32, dtype);
+}
+
+TEST(CloneNodeTest, clone_Relu)
+{
+  auto g = loco::make_graph();
+  auto node_relu = g->nodes()->create<luci::CircleRelu>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_relu, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_relu = dynamic_cast<luci::CircleRelu *>(cloned);
+  ASSERT_NE(nullptr, cloned_relu);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRelu6.cpp b/compiler/luci/service/src/Nodes/CircleRelu6.cpp

index 92a596d082be79e9fc937bf94608bb3bade53815..7b98311eda725394dd25e7d59ebd868e1dc011e7 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleRelu6.cpp
+++ b/compiler/luci/service/src/Nodes/CircleRelu6.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,14 +14,14 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu6 *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleRelu6 *)
  {
-  return input_arg_signature(node, 0);
+  return _graph->nodes()->create<luci::CircleRelu6>();
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRelu6.test.cpp b/compiler/luci/service/src/Nodes/CircleRelu6.test.cpp

new file mode 100644 (file)

index 0000000..213dbcb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRelu6.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Relu6)
+{
+  auto g = loco::make_graph();
+  auto node_relu6 = g->nodes()->create<luci::CircleRelu6>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_relu6, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_relu6 = dynamic_cast<luci::CircleRelu6 *>(cloned);
+  ASSERT_NE(nullptr, cloned_relu6);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp

index 1e8d9971d5a3abb27af021b40e432a177bdebbb3..4efedb9fcc8ecddd3b21f735f5aefcea4a6579ce 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp
+++ b/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,14 +14,14 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReluN1To1 *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReluN1To1 *)
  {
-  return input_arg_signature(node, 0);
+  return _graph->nodes()->create<luci::CircleReluN1To1>();
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReluN1To1.test.cpp b/compiler/luci/service/src/Nodes/CircleReluN1To1.test.cpp

new file mode 100644 (file)

index 0000000..b828e79
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReluN1To1.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReluN1To1)
+{
+  auto g = loco::make_graph();
+  auto node_relun1 = g->nodes()->create<luci::CircleReluN1To1>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_relun1, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_relun1 = dynamic_cast<luci::CircleReluN1To1 *>(cloned);
+  ASSERT_NE(nullptr, cloned_relun1);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReshape.cpp b/compiler/luci/service/src/Nodes/CircleReshape.cpp

new file mode 100644 (file)

index 0000000..07a81b3
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReshape.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleReshape *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleReshape>();
+  if (cloned != nullptr)
+  {
+    uint32_t rank = node->newShape()->rank();
+    cloned->newShape()->rank(rank);
+    for (uint32_t r = 0; r < rank; ++r)
+    {
+      cloned->newShape()->dim(r) = node->newShape()->dim(r);
+    }
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReshape.test.cpp b/compiler/luci/service/src/Nodes/CircleReshape.test.cpp

new file mode 100644 (file)

index 0000000..ca92b71
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReshape.test.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Reshape)
+{
+  auto g = loco::make_graph();
+  auto node_reshape = g->nodes()->create<luci::CircleReshape>();
+  node_reshape->newShape()->rank(2);
+  node_reshape->newShape()->dim(0) = 3;
+  node_reshape->newShape()->dim(1) = 4;
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_reshape, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_reshape = dynamic_cast<luci::CircleReshape *>(cloned);
+  ASSERT_NE(nullptr, cloned_reshape);
+  ASSERT_EQ(node_reshape->newShape()->rank(), cloned_reshape->newShape()->rank());
+  ASSERT_EQ(node_reshape->newShape()->dim(0), cloned_reshape->newShape()->dim(0));
+  ASSERT_EQ(node_reshape->newShape()->dim(1), cloned_reshape->newShape()->dim(1));
+}
diff --git a/compiler/luci/service/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/service/src/Nodes/CircleResizeBilinear.cpp

new file mode 100644 (file)

index 0000000..55d21af
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleResizeBilinear.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleResizeBilinear *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleResizeBilinear>();
+  if (cloned != nullptr)
+  {
+    cloned->align_corners(node->align_corners());
+    cloned->half_pixel_centers(node->half_pixel_centers());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleResizeBilinear.test.cpp b/compiler/luci/service/src/Nodes/CircleResizeBilinear.test.cpp

new file mode 100644 (file)

index 0000000..bff7126
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleResizeBilinear.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, resize_bilinear_simple)
+{
+  luci::CircleInput input;
+  luci::CircleConst rb_size;
+  luci::CircleResizeBilinear rb;
+
+  input.shape({1, 4, 4, 3});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  rb_size.dtype(loco::DataType::S32);
+  rb_size.rank(1);
+  rb_size.dim(0).set(2);
+  rb_size.size<loco::DataType::S32>(2);
+  rb_size.at<loco::DataType::S32>(0) = 16;
+  rb_size.at<loco::DataType::S32>(1) = 16;
+  rb_size.shape_status(luci::ShapeStatus::VALID);
+
+  rb.input(&input);
+  rb.size(&rb_size);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&rb, shape));
+  ASSERT_EQ(4, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(16, shape.dim(1).value());
+  ASSERT_EQ(16, shape.dim(2).value());
+  ASSERT_EQ(3, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_ResizeBilinear)
+{
+  auto g = loco::make_graph();
+  auto node_rb = g->nodes()->create<luci::CircleResizeBilinear>();
+  node_rb->align_corners(true);
+  node_rb->half_pixel_centers(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rb, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rb = dynamic_cast<luci::CircleResizeBilinear *>(cloned);
+  ASSERT_NE(nullptr, cloned_rb);
+  ASSERT_EQ(node_rb->align_corners(), cloned_rb->align_corners());
+  ASSERT_EQ(node_rb->half_pixel_centers(), cloned_rb->half_pixel_centers());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.cpp

new file mode 100644 (file)

index 0000000..5727786
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleResizeNearestNeighbor *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleResizeNearestNeighbor>();
+  if (cloned != nullptr)
+    cloned->align_corners(node->align_corners());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.test.cpp b/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.test.cpp

new file mode 100644 (file)

index 0000000..a1d781c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, resize_nearest_neighbor_simple)
+{
+  luci::CircleInput input;
+  luci::CircleConst rnn_size;
+  luci::CircleResizeNearestNeighbor rnn;
+
+  input.shape({1, 4, 4, 3});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  rnn_size.dtype(loco::DataType::S32);
+  rnn_size.rank(1);
+  rnn_size.dim(0).set(2);
+  rnn_size.size<loco::DataType::S32>(2);
+  rnn_size.at<loco::DataType::S32>(0) = 16;
+  rnn_size.at<loco::DataType::S32>(1) = 16;
+  rnn_size.shape_status(luci::ShapeStatus::VALID);
+
+  rnn.input(&input);
+  rnn.size(&rnn_size);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&rnn, shape));
+  ASSERT_EQ(4, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(16, shape.dim(1).value());
+  ASSERT_EQ(16, shape.dim(2).value());
+  ASSERT_EQ(3, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_ResizeNearestNeighbor)
+{
+  auto g = loco::make_graph();
+  auto node_rnn = g->nodes()->create<luci::CircleResizeNearestNeighbor>();
+  node_rnn->align_corners(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rnn, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rnn = dynamic_cast<luci::CircleResizeNearestNeighbor *>(cloned);
+  ASSERT_NE(nullptr, cloned_rnn);
+  ASSERT_EQ(node_rnn->align_corners(), cloned_rnn->align_corners());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/service/src/Nodes/CircleReverseSequence.cpp

new file mode 100644 (file)

index 0000000..6e6919b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReverseSequence.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleReverseSequence *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleReverseSequence>();
+  if (cloned != nullptr)
+  {
+    cloned->seq_axis(node->seq_axis());
+    cloned->batch_axis(node->batch_axis());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReverseSequence.test.cpp b/compiler/luci/service/src/Nodes/CircleReverseSequence.test.cpp

new file mode 100644 (file)

index 0000000..a7a8e39
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReverseSequence.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReverseSequence)
+{
+  auto g = loco::make_graph();
+  auto node_rs = g->nodes()->create<luci::CircleReverseSequence>();
+  node_rs->seq_axis(1);
+  node_rs->batch_axis(2);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rs, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rs = dynamic_cast<luci::CircleReverseSequence *>(cloned);
+  ASSERT_NE(nullptr, cloned_rs);
+  ASSERT_EQ(node_rs->seq_axis(), cloned_rs->seq_axis());
+  ASSERT_EQ(node_rs->batch_axis(), cloned_rs->batch_axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReverseV2.cpp b/compiler/luci/service/src/Nodes/CircleReverseV2.cpp

new file mode 100644 (file)

index 0000000..e8fee6c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReverseV2.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleReverseV2 *)
+{
+  return _graph->nodes()->create<luci::CircleReverseV2>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReverseV2.test.cpp b/compiler/luci/service/src/Nodes/CircleReverseV2.test.cpp

new file mode 100644 (file)

index 0000000..0e5ff93
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReverseV2.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReverseV2)
+{
+  auto g = loco::make_graph();
+  auto node_rev = g->nodes()->create<luci::CircleReverseV2>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rev, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rev = dynamic_cast<luci::CircleReverseV2 *>(cloned);
+  ASSERT_NE(nullptr, cloned_rev);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRound.cpp b/compiler/luci/service/src/Nodes/CircleRound.cpp

new file mode 100644 (file)

index 0000000..2c23f2d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRound.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleRound *)
+{
+  return _graph->nodes()->create<luci::CircleRound>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRound.test.cpp b/compiler/luci/service/src/Nodes/CircleRound.test.cpp

new file mode 100644 (file)

index 0000000..2c2c3a9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRound.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Round)
+{
+  auto g = loco::make_graph();
+  auto node_rnd = g->nodes()->create<luci::CircleRound>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rnd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rnd = dynamic_cast<luci::CircleRound *>(cloned);
+  ASSERT_NE(nullptr, cloned_rnd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRsqrt.cpp b/compiler/luci/service/src/Nodes/CircleRsqrt.cpp

new file mode 100644 (file)

index 0000000..aca702f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRsqrt.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleRsqrt *)
+{
+  return _graph->nodes()->create<luci::CircleRsqrt>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRsqrt.test.cpp b/compiler/luci/service/src/Nodes/CircleRsqrt.test.cpp

new file mode 100644 (file)

index 0000000..3e4ced5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRsqrt.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Rsqrt)
+{
+  auto g = loco::make_graph();
+  auto node_rsqrt = g->nodes()->create<luci::CircleRsqrt>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rsqrt, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rsqrt = dynamic_cast<luci::CircleRsqrt *>(cloned);
+  ASSERT_NE(nullptr, cloned_rsqrt);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleScatterNd.cpp b/compiler/luci/service/src/Nodes/CircleScatterNd.cpp

new file mode 100644 (file)

index 0000000..6c477a5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleScatterNd.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleScatterNd *)
+{
+  return _graph->nodes()->create<luci::CircleScatterNd>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleScatterNd.test.cpp b/compiler/luci/service/src/Nodes/CircleScatterNd.test.cpp

new file mode 100644 (file)

index 0000000..ce63603
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleScatterNd.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ScatterNd)
+{
+  auto g = loco::make_graph();
+  auto node_snd = g->nodes()->create<luci::CircleScatterNd>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_snd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_snd = dynamic_cast<luci::CircleScatterNd *>(cloned);
+  ASSERT_NE(nullptr, cloned_snd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/service/src/Nodes/CircleSegmentSum.cpp

new file mode 100644 (file)

index 0000000..aa4001f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSegmentSum.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSegmentSum *)
+{
+  return _graph->nodes()->create<luci::CircleSegmentSum>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSegmentSum.test.cpp b/compiler/luci/service/src/Nodes/CircleSegmentSum.test.cpp

new file mode 100644 (file)

index 0000000..ff17b07
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSegmentSum.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SegmentSum)
+{
+  auto g = loco::make_graph();
+  auto node_ss = g->nodes()->create<luci::CircleSegmentSum>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ss, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ss = dynamic_cast<luci::CircleSegmentSum *>(cloned);
+  ASSERT_NE(nullptr, cloned_ss);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSelect.cpp b/compiler/luci/service/src/Nodes/CircleSelect.cpp

new file mode 100644 (file)

index 0000000..71b31d3
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSelect.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSelect *)
+{
+  return _graph->nodes()->create<luci::CircleSelect>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSelect.test.cpp b/compiler/luci/service/src/Nodes/CircleSelect.test.cpp

new file mode 100644 (file)

index 0000000..e8d6316
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSelect.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Select)
+{
+  auto g = loco::make_graph();
+  auto node_sel = g->nodes()->create<luci::CircleSelect>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sel, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sel = dynamic_cast<luci::CircleSelect *>(cloned);
+  ASSERT_NE(nullptr, cloned_sel);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSelectV2.cpp b/compiler/luci/service/src/Nodes/CircleSelectV2.cpp

new file mode 100644 (file)

index 0000000..07af40c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSelectV2.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSelectV2 *)
+{
+  return _graph->nodes()->create<luci::CircleSelectV2>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSelectV2.test.cpp b/compiler/luci/service/src/Nodes/CircleSelectV2.test.cpp

new file mode 100644 (file)

index 0000000..253dba5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSelectV2.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SelectV2)
+{
+  auto g = loco::make_graph();
+  auto node_sel = g->nodes()->create<luci::CircleSelectV2>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sel, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sel = dynamic_cast<luci::CircleSelectV2 *>(cloned);
+  ASSERT_NE(nullptr, cloned_sel);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleShape.cpp b/compiler/luci/service/src/Nodes/CircleShape.cpp

new file mode 100644 (file)

index 0000000..e5b5fa2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleShape.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleShape *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleShape>();
+  if (cloned != nullptr)
+    cloned->out_type(node->out_type());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleShape.test.cpp b/compiler/luci/service/src/Nodes/CircleShape.test.cpp

new file mode 100644 (file)

index 0000000..ec057bd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleShape.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Shape)
+{
+  auto g = loco::make_graph();
+  auto node_shape = g->nodes()->create<luci::CircleShape>();
+  node_shape->out_type(loco::DataType::S32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_shape, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_shape = dynamic_cast<luci::CircleShape *>(cloned);
+  ASSERT_NE(nullptr, cloned_shape);
+  ASSERT_EQ(node_shape->out_type(), cloned_shape->out_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSin.cpp b/compiler/luci/service/src/Nodes/CircleSin.cpp

new file mode 100644 (file)

index 0000000..46a07d2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSin.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSin *)
+{
+  return _graph->nodes()->create<luci::CircleSin>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSin.test.cpp b/compiler/luci/service/src/Nodes/CircleSin.test.cpp

new file mode 100644 (file)

index 0000000..b072e7e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSin.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sin)
+{
+  auto g = loco::make_graph();
+  auto node_sin = g->nodes()->create<luci::CircleSin>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sin, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sin = dynamic_cast<luci::CircleSin *>(cloned);
+  ASSERT_NE(nullptr, cloned_sin);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSlice.cpp b/compiler/luci/service/src/Nodes/CircleSlice.cpp

new file mode 100644 (file)

index 0000000..6b2f4a5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSlice.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSlice *)
+{
+  return _graph->nodes()->create<luci::CircleSlice>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSlice.test.cpp b/compiler/luci/service/src/Nodes/CircleSlice.test.cpp

new file mode 100644 (file)

index 0000000..48ec203
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSlice.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Slice)
+{
+  auto g = loco::make_graph();
+  auto node_slice = g->nodes()->create<luci::CircleSlice>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_slice, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_slice = dynamic_cast<luci::CircleSlice *>(cloned);
+  ASSERT_NE(nullptr, cloned_slice);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSoftmax.cpp b/compiler/luci/service/src/Nodes/CircleSoftmax.cpp

new file mode 100644 (file)

index 0000000..359d100
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSoftmax.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSoftmax *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSoftmax>();
+  if (cloned != nullptr)
+    cloned->beta(node->beta());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSoftmax.test.cpp b/compiler/luci/service/src/Nodes/CircleSoftmax.test.cpp

new file mode 100644 (file)

index 0000000..c80b44d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSoftmax.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Softmax)
+{
+  auto g = loco::make_graph();
+  auto node_sm = g->nodes()->create<luci::CircleSoftmax>();
+  node_sm->beta(2.3f);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sm, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sm = dynamic_cast<luci::CircleSoftmax *>(cloned);
+  ASSERT_NE(nullptr, cloned_sm);
+  ASSERT_EQ(node_sm->beta(), cloned_sm->beta());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.cpp

new file mode 100644 (file)

index 0000000..feb4f3e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSpaceToBatchND *)
+{
+  return _graph->nodes()->create<luci::CircleSpaceToBatchND>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.test.cpp b/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.test.cpp

new file mode 100644 (file)

index 0000000..eb74379
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SpaceToBatchND)
+{
+  auto g = loco::make_graph();
+  auto node_s2bnd = g->nodes()->create<luci::CircleSpaceToBatchND>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_s2bnd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_s2bnd = dynamic_cast<luci::CircleSpaceToBatchND *>(cloned);
+  ASSERT_NE(nullptr, cloned_s2bnd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/service/src/Nodes/CircleSpaceToDepth.cpp

new file mode 100644 (file)

index 0000000..3a82f5c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSpaceToDepth.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSpaceToDepth *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSpaceToDepth>();
+  if (cloned != nullptr)
+    cloned->block_size(node->block_size());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSpaceToDepth.test.cpp b/compiler/luci/service/src/Nodes/CircleSpaceToDepth.test.cpp

new file mode 100644 (file)

index 0000000..fb544e6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSpaceToDepth.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SpaceToDepth)
+{
+  auto g = loco::make_graph();
+  auto node_s2d = g->nodes()->create<luci::CircleSpaceToDepth>();
+  node_s2d->block_size(32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_s2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_s2d = dynamic_cast<luci::CircleSpaceToDepth *>(cloned);
+  ASSERT_NE(nullptr, cloned_s2d);
+  ASSERT_EQ(node_s2d->block_size(), cloned_s2d->block_size());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/service/src/Nodes/CircleSparseToDense.cpp

new file mode 100644 (file)

index 0000000..3dba1a5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSparseToDense.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSparseToDense *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSparseToDense>();
+  if (cloned != nullptr)
+    cloned->validate_indices(node->validate_indices());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/service/src/Nodes/CircleSparseToDense.test.cpp

new file mode 100644 (file)

index 0000000..177a469
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSparseToDense.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SparseToDense)
+{
+  auto g = loco::make_graph();
+  auto node_s2d = g->nodes()->create<luci::CircleSparseToDense>();
+  node_s2d->validate_indices(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_s2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_s2d = dynamic_cast<luci::CircleSparseToDense *>(cloned);
+  ASSERT_NE(nullptr, cloned_s2d);
+  ASSERT_EQ(node_s2d->validate_indices(), cloned_s2d->validate_indices());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSplit.cpp b/compiler/luci/service/src/Nodes/CircleSplit.cpp

new file mode 100644 (file)

index 0000000..e68a24a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplit.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplit *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSplit>();
+  if (cloned != nullptr)
+    cloned->num_split(node->num_split());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSplit.test.cpp b/compiler/luci/service/src/Nodes/CircleSplit.test.cpp

new file mode 100644 (file)

index 0000000..9ee26b4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplit.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Split)
+{
+  auto g = loco::make_graph();
+  auto node_split = g->nodes()->create<luci::CircleSplit>();
+  node_split->num_split(5);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_split, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_split = dynamic_cast<luci::CircleSplit *>(cloned);
+  ASSERT_NE(nullptr, cloned_split);
+  ASSERT_EQ(node_split->num_split(), cloned_split->num_split());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSplitOut.cpp b/compiler/luci/service/src/Nodes/CircleSplitOut.cpp

new file mode 100644 (file)

index 0000000..0245988
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplitOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSplitOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSplitOut.test.cpp b/compiler/luci/service/src/Nodes/CircleSplitOut.test.cpp

new file mode 100644 (file)

index 0000000..deec088
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitOut)
+{
+  auto g = loco::make_graph();
+  auto node_sout = g->nodes()->create<luci::CircleSplitOut>();
+  node_sout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sout = dynamic_cast<luci::CircleSplitOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_sout);
+  ASSERT_EQ(node_sout->index(), cloned_sout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSplitV.cpp b/compiler/luci/service/src/Nodes/CircleSplitV.cpp

new file mode 100644 (file)

index 0000000..de6c6cc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitV.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplitV *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSplitV>();
+  if (cloned != nullptr)
+    cloned->num_split(node->num_split());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSplitV.test.cpp b/compiler/luci/service/src/Nodes/CircleSplitV.test.cpp

new file mode 100644 (file)

index 0000000..d109a64
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitV.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitV)
+{
+  auto g = loco::make_graph();
+  auto node_split = g->nodes()->create<luci::CircleSplitV>();
+  node_split->num_split(5);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_split, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_split = dynamic_cast<luci::CircleSplitV *>(cloned);
+  ASSERT_NE(nullptr, cloned_split);
+  ASSERT_EQ(node_split->num_split(), cloned_split->num_split());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSplitVOut.cpp b/compiler/luci/service/src/Nodes/CircleSplitVOut.cpp

new file mode 100644 (file)

index 0000000..f40eb0a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitVOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplitVOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSplitVOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSplitVOut.test.cpp b/compiler/luci/service/src/Nodes/CircleSplitVOut.test.cpp

new file mode 100644 (file)

index 0000000..ab5e9d6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitVOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitVOut)
+{
+  auto g = loco::make_graph();
+  auto node_sout = g->nodes()->create<luci::CircleSplitVOut>();
+  node_sout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sout = dynamic_cast<luci::CircleSplitVOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_sout);
+  ASSERT_EQ(node_sout->index(), cloned_sout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSqrt.cpp b/compiler/luci/service/src/Nodes/CircleSqrt.cpp

new file mode 100644 (file)

index 0000000..a3e6368
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSqrt.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSqrt *)
+{
+  return _graph->nodes()->create<luci::CircleSqrt>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSqrt.test.cpp b/compiler/luci/service/src/Nodes/CircleSqrt.test.cpp

new file mode 100644 (file)

index 0000000..dbef839
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSqrt.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sqrt)
+{
+  auto g = loco::make_graph();
+  auto node_sqrt = g->nodes()->create<luci::CircleSqrt>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sqrt, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sqrt = dynamic_cast<luci::CircleSqrt *>(cloned);
+  ASSERT_NE(nullptr, cloned_sqrt);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSquare.cpp b/compiler/luci/service/src/Nodes/CircleSquare.cpp

new file mode 100644 (file)

index 0000000..88bbed7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSquare.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSquare *)
+{
+  return _graph->nodes()->create<luci::CircleSquare>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSquare.test.cpp b/compiler/luci/service/src/Nodes/CircleSquare.test.cpp

new file mode 100644 (file)

index 0000000..67ac212
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSquare.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Square)
+{
+  auto g = loco::make_graph();
+  auto node_squ = g->nodes()->create<luci::CircleSquare>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_squ, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_squ = dynamic_cast<luci::CircleSquare *>(cloned);
+  ASSERT_NE(nullptr, cloned_squ);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/service/src/Nodes/CircleSquaredDifference.cpp

new file mode 100644 (file)

index 0000000..6becdf1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSquaredDifference.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSquaredDifference *)
+{
+  return _graph->nodes()->create<luci::CircleSquaredDifference>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSquaredDifference.test.cpp b/compiler/luci/service/src/Nodes/CircleSquaredDifference.test.cpp

new file mode 100644 (file)

index 0000000..2609961
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSquaredDifference.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SquaredDifference)
+{
+  auto g = loco::make_graph();
+  auto node_sd = g->nodes()->create<luci::CircleSquaredDifference>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sd = dynamic_cast<luci::CircleSquaredDifference *>(cloned);
+  ASSERT_NE(nullptr, cloned_sd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSqueeze.cpp b/compiler/luci/service/src/Nodes/CircleSqueeze.cpp

new file mode 100644 (file)

index 0000000..02ba502
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSqueeze.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSqueeze *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSqueeze>();
+  if (cloned != nullptr)
+    cloned->squeeze_dims(node->squeeze_dims());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSqueeze.test.cpp b/compiler/luci/service/src/Nodes/CircleSqueeze.test.cpp

new file mode 100644 (file)

index 0000000..bc73eaf
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSqueeze.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, squeeze_simple)
+{
+  luci::CircleInput input;
+  luci::CircleSqueeze squeeze;
+
+  input.shape({1, 4, 3, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  squeeze.input(&input);
+  squeeze.squeeze_dims({0});
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&squeeze, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(4, shape.dim(0).value());
+  ASSERT_EQ(3, shape.dim(1).value());
+  ASSERT_EQ(1, shape.dim(2).value());
+}
+
+TEST(ShapeRuleTest, squeeze_all)
+{
+  luci::CircleInput input;
+  luci::CircleSqueeze squeeze;
+
+  input.shape({1, 4, 3, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  squeeze.input(&input);
+  squeeze.squeeze_dims({});
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&squeeze, shape));
+  ASSERT_EQ(2, shape.rank());
+  ASSERT_EQ(4, shape.dim(0).value());
+  ASSERT_EQ(3, shape.dim(1).value());
+}
+
+TEST(CloneNodeTest, clone_Squeeze)
+{
+  auto g = loco::make_graph();
+  auto node_squ = g->nodes()->create<luci::CircleSqueeze>();
+  node_squ->squeeze_dims({2, 3});
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_squ, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_squ = dynamic_cast<luci::CircleSqueeze *>(cloned);
+  ASSERT_NE(nullptr, cloned_squ);
+  ASSERT_EQ(node_squ->squeeze_dims().size(), cloned_squ->squeeze_dims().size());
+  for (size_t s = 0; s < node_squ->squeeze_dims().size(); ++s)
+    ASSERT_EQ(node_squ->squeeze_dims().at(s), cloned_squ->squeeze_dims().at(s));
+}
diff --git a/compiler/luci/service/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/service/src/Nodes/CircleStridedSlice.cpp

new file mode 100644 (file)

index 0000000..c4d1993
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleStridedSlice.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleStridedSlice *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleStridedSlice>();
+  if (cloned != nullptr)
+  {
+    cloned->begin_mask(node->begin_mask());
+    cloned->end_mask(node->end_mask());
+    cloned->ellipsis_mask(node->ellipsis_mask());
+    cloned->new_axis_mask(node->new_axis_mask());
+    cloned->shrink_axis_mask(node->shrink_axis_mask());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleStridedSlice.test.cpp b/compiler/luci/service/src/Nodes/CircleStridedSlice.test.cpp

new file mode 100644 (file)

index 0000000..d633f30
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleStridedSlice.test.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_StridedSlice)
+{
+  auto g = loco::make_graph();
+  auto node_ss = g->nodes()->create<luci::CircleStridedSlice>();
+  node_ss->begin_mask(1);
+  node_ss->end_mask(2);
+  node_ss->ellipsis_mask(3);
+  node_ss->new_axis_mask(4);
+  node_ss->shrink_axis_mask(5);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ss, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ss = dynamic_cast<luci::CircleStridedSlice *>(cloned);
+  ASSERT_NE(nullptr, cloned_ss);
+  ASSERT_EQ(node_ss->begin_mask(), cloned_ss->begin_mask());
+  ASSERT_EQ(node_ss->end_mask(), cloned_ss->end_mask());
+  ASSERT_EQ(node_ss->ellipsis_mask(), cloned_ss->ellipsis_mask());
+  ASSERT_EQ(node_ss->new_axis_mask(), cloned_ss->new_axis_mask());
+  ASSERT_EQ(node_ss->shrink_axis_mask(), cloned_ss->shrink_axis_mask());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSub.cpp b/compiler/luci/service/src/Nodes/CircleSub.cpp

new file mode 100644 (file)

index 0000000..fb4bab1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSub.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSub *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleSub>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSub.test.cpp b/compiler/luci/service/src/Nodes/CircleSub.test.cpp

new file mode 100644 (file)

index 0000000..e6bd7b8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSub.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sub)
+{
+  auto g = loco::make_graph();
+  auto node_sub = g->nodes()->create<luci::CircleSub>();
+  node_sub->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sub, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sub = dynamic_cast<luci::CircleSub *>(cloned);
+  ASSERT_NE(nullptr, cloned_sub);
+  ASSERT_EQ(node_sub->fusedActivationFunction(), cloned_sub->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Sub_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_sub = g->nodes()->create<luci::CircleSub>();
+  node_sub->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sub, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSum.cpp b/compiler/luci/service/src/Nodes/CircleSum.cpp

index 9ef90e8e023c0e892962e38cc03ecb4bcb6ee79b..29e6ee5f1b61aacc0588b0b06d3cb3440fc4b61b 100644 (file)
--- a/compiler/luci/service/src/Nodes/CircleSum.cpp
+++ b/compiler/luci/service/src/Nodes/CircleSum.cpp
@@ -1,11 +1,11 @@
  /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,15 +14,17 @@
   * limitations under the License.
   */
  
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
  
  namespace luci
  {
  
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleSum *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleSum *node)
  {
-  return legalized_signature(
-      reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+  auto *cloned = _graph->nodes()->create<luci::CircleSum>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
  }
  
  } // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSum.test.cpp b/compiler/luci/service/src/Nodes/CircleSum.test.cpp

new file mode 100644 (file)

index 0000000..aa1b0d1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSum.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sum)
+{
+  auto g = loco::make_graph();
+  auto node_sum = g->nodes()->create<luci::CircleSum>();
+  node_sum->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sum, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sum = dynamic_cast<luci::CircleSum *>(cloned);
+  ASSERT_NE(nullptr, cloned_sum);
+  ASSERT_EQ(node_sum->keep_dims(), cloned_sum->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTanh.cpp b/compiler/luci/service/src/Nodes/CircleTanh.cpp

new file mode 100644 (file)

index 0000000..9cb3593
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTanh.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTanh *)
+{
+  return _graph->nodes()->create<luci::CircleTanh>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTanh.test.cpp b/compiler/luci/service/src/Nodes/CircleTanh.test.cpp

new file mode 100644 (file)

index 0000000..0215b42
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTanh.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Tanh)
+{
+  auto g = loco::make_graph();
+  auto node_tanh = g->nodes()->create<luci::CircleTanh>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_tanh, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_tanh = dynamic_cast<luci::CircleTanh *>(cloned);
+  ASSERT_NE(nullptr, cloned_tanh);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTile.cpp b/compiler/luci/service/src/Nodes/CircleTile.cpp

new file mode 100644 (file)

index 0000000..21c32e0
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTile.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTile *)
+{
+  return _graph->nodes()->create<luci::CircleTile>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTile.test.cpp b/compiler/luci/service/src/Nodes/CircleTile.test.cpp

new file mode 100644 (file)

index 0000000..089c86c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTile.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Tile)
+{
+  auto g = loco::make_graph();
+  auto node_tile = g->nodes()->create<luci::CircleTile>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_tile, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_tile = dynamic_cast<luci::CircleTile *>(cloned);
+  ASSERT_NE(nullptr, cloned_tile);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTopKV2.cpp b/compiler/luci/service/src/Nodes/CircleTopKV2.cpp

new file mode 100644 (file)

index 0000000..e940c03
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTopKV2.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTopKV2 *)
+{
+  return _graph->nodes()->create<luci::CircleTopKV2>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTopKV2.test.cpp b/compiler/luci/service/src/Nodes/CircleTopKV2.test.cpp

new file mode 100644 (file)

index 0000000..7f68a40
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTopKV2.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TopKV2)
+{
+  auto g = loco::make_graph();
+  auto node_top = g->nodes()->create<luci::CircleTopKV2>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_top, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_top = dynamic_cast<luci::CircleTopKV2 *>(cloned);
+  ASSERT_NE(nullptr, cloned_top);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTopKV2Out.cpp b/compiler/luci/service/src/Nodes/CircleTopKV2Out.cpp

new file mode 100644 (file)

index 0000000..5c13f2b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTopKV2Out.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTopKV2Out *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleTopKV2Out>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTopKV2Out.test.cpp b/compiler/luci/service/src/Nodes/CircleTopKV2Out.test.cpp

new file mode 100644 (file)

index 0000000..cfba61f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTopKV2Out.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TopKV2Out)
+{
+  auto g = loco::make_graph();
+  auto node_tout = g->nodes()->create<luci::CircleTopKV2Out>();
+  node_tout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_tout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_tout = dynamic_cast<luci::CircleTopKV2Out *>(cloned);
+  ASSERT_NE(nullptr, cloned_tout);
+  ASSERT_EQ(node_tout->index(), cloned_tout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTranspose.cpp b/compiler/luci/service/src/Nodes/CircleTranspose.cpp

new file mode 100644 (file)

index 0000000..81db552
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTranspose.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTranspose *)
+{
+  return _graph->nodes()->create<luci::CircleTranspose>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTranspose.test.cpp b/compiler/luci/service/src/Nodes/CircleTranspose.test.cpp

new file mode 100644 (file)

index 0000000..9447d1a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTranspose.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, transpose_simple)
+{
+  luci::CircleInput input;
+  luci::CircleConst perm;
+  luci::CircleTranspose transpose;
+
+  input.shape({3, 8, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  perm.dtype(loco::DataType::S32);
+  perm.rank(1);
+  perm.dim(0).set(3);
+  perm.size<loco::DataType::S32>(3);
+  perm.at<loco::DataType::S32>(0) = 1;
+  perm.at<loco::DataType::S32>(1) = 2;
+  perm.at<loco::DataType::S32>(2) = 0;
+  perm.shape_status(luci::ShapeStatus::VALID);
+
+  transpose.a(&input);
+  transpose.perm(&perm);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&transpose, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(8, shape.dim(0).value());
+  ASSERT_EQ(1, shape.dim(1).value());
+  ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_Transpose)
+{
+  auto g = loco::make_graph();
+  auto node_tr = g->nodes()->create<luci::CircleTranspose>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_tr, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_tr = dynamic_cast<luci::CircleTranspose *>(cloned);
+  ASSERT_NE(nullptr, cloned_tr);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/service/src/Nodes/CircleTransposeConv.cpp

new file mode 100644 (file)

index 0000000..1fe41bd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTransposeConv.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTransposeConv *node)
+{
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleTransposeConv>();
+  if (cloned != nullptr)
+  {
+    cloned->padding(node->padding());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/service/src/Nodes/CircleTransposeConv.test.cpp

new file mode 100644 (file)

index 0000000..29a656c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTransposeConv.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TransposeConv)
+{
+  auto g = loco::make_graph();
+  auto node_trconv = g->nodes()->create<luci::CircleTransposeConv>();
+  node_trconv->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_trconv, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_trconv = dynamic_cast<luci::CircleTransposeConv *>(cloned);
+  ASSERT_NE(nullptr, cloned_trconv);
+  ASSERT_EQ(node_trconv->padding(), cloned_trconv->padding());
+}
+
+TEST(CloneNodeTest, clone_TransposeConv_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_trconv = g->nodes()->create<luci::CircleTransposeConv>();
+  node_trconv->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_trconv, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp

new file mode 100644 (file)

index 0000000..12205f3
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnidirectionalSequenceLSTM *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->cell_clip(node->cell_clip());
+    cloned->proj_clip(node->proj_clip());
+    cloned->time_major(node->time_major());
+    cloned->asymmetric_quantize_inputs(node->asymmetric_quantize_inputs());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp b/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp

new file mode 100644 (file)

index 0000000..c3816ab
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UnidirectionalSequenceLSTM)
+{
+  auto g = loco::make_graph();
+  auto node_uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+  node_uslstm->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_uslstm->cell_clip(1.1f);
+  node_uslstm->proj_clip(2.2f);
+  node_uslstm->time_major(true);
+  node_uslstm->asymmetric_quantize_inputs(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uslstm, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_uslstm = dynamic_cast<luci::CircleUnidirectionalSequenceLSTM *>(cloned);
+  ASSERT_NE(nullptr, cloned_uslstm);
+  ASSERT_EQ(node_uslstm->fusedActivationFunction(), cloned_uslstm->fusedActivationFunction());
+  ASSERT_EQ(node_uslstm->cell_clip(), cloned_uslstm->cell_clip());
+  ASSERT_EQ(node_uslstm->proj_clip(), cloned_uslstm->proj_clip());
+  ASSERT_EQ(node_uslstm->time_major(), cloned_uslstm->time_major());
+  ASSERT_EQ(node_uslstm->asymmetric_quantize_inputs(), cloned_uslstm->asymmetric_quantize_inputs());
+}
+
+TEST(CloneNodeTest, clone_UnidirectionalSequenceLSTM_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+  node_uslstm->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uslstm, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUnique.cpp b/compiler/luci/service/src/Nodes/CircleUnique.cpp

new file mode 100644 (file)

index 0000000..bde2ea0
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnique.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnique *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleUnique>();
+  if (cloned != nullptr)
+    cloned->idx_out_type(node->idx_out_type());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUnique.test.cpp b/compiler/luci/service/src/Nodes/CircleUnique.test.cpp

new file mode 100644 (file)

index 0000000..a8ff9ea
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnique.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Unique)
+{
+  auto g = loco::make_graph();
+  auto node_uniq = g->nodes()->create<luci::CircleUnique>();
+  node_uniq->idx_out_type(loco::DataType::S32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uniq, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_uniq = dynamic_cast<luci::CircleUnique *>(cloned);
+  ASSERT_NE(nullptr, cloned_uniq);
+  ASSERT_EQ(node_uniq->idx_out_type(), cloned_uniq->idx_out_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUniqueOut.cpp b/compiler/luci/service/src/Nodes/CircleUniqueOut.cpp

new file mode 100644 (file)

index 0000000..30093f9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUniqueOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUniqueOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleUniqueOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUniqueOut.test.cpp b/compiler/luci/service/src/Nodes/CircleUniqueOut.test.cpp

new file mode 100644 (file)

index 0000000..780ad4b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUniqueOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UniqueOut)
+{
+  auto g = loco::make_graph();
+  auto node_uout = g->nodes()->create<luci::CircleUniqueOut>();
+  node_uout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_uout = dynamic_cast<luci::CircleUniqueOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_uout);
+  ASSERT_EQ(node_uout->index(), cloned_uout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUnpack.cpp b/compiler/luci/service/src/Nodes/CircleUnpack.cpp

new file mode 100644 (file)

index 0000000..f9d61c4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnpack.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnpack *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleUnpack>();
+  if (cloned != nullptr)
+  {
+    cloned->num(node->num());
+    cloned->axis(node->axis());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUnpack.test.cpp b/compiler/luci/service/src/Nodes/CircleUnpack.test.cpp

new file mode 100644 (file)

index 0000000..6559a92
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnpack.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Unpack)
+{
+  auto g = loco::make_graph();
+  auto node_unp = g->nodes()->create<luci::CircleUnpack>();
+  node_unp->num(1);
+  node_unp->axis(2);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_unp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_unp = dynamic_cast<luci::CircleUnpack *>(cloned);
+  ASSERT_NE(nullptr, cloned_unp);
+  ASSERT_EQ(node_unp->num(), cloned_unp->num());
+  ASSERT_EQ(node_unp->axis(), cloned_unp->axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUnpackOut.cpp b/compiler/luci/service/src/Nodes/CircleUnpackOut.cpp

new file mode 100644 (file)

index 0000000..342d5da
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnpackOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnpackOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleUnpackOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUnpackOut.test.cpp b/compiler/luci/service/src/Nodes/CircleUnpackOut.test.cpp

new file mode 100644 (file)

index 0000000..ec9bb97
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnpackOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UnpackOut)
+{
+  auto g = loco::make_graph();
+  auto node_uout = g->nodes()->create<luci::CircleUnpackOut>();
+  node_uout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_uout = dynamic_cast<luci::CircleUnpackOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_uout);
+  ASSERT_EQ(node_uout->index(), cloned_uout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleWhere.cpp b/compiler/luci/service/src/Nodes/CircleWhere.cpp

new file mode 100644 (file)

index 0000000..73f4b64
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleWhere.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleWhere *)
+{
+  return _graph->nodes()->create<luci::CircleWhere>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleWhere.test.cpp b/compiler/luci/service/src/Nodes/CircleWhere.test.cpp

new file mode 100644 (file)

index 0000000..352719d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleWhere.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Where)
+{
+  auto g = loco::make_graph();
+  auto node_wh = g->nodes()->create<luci::CircleWhere>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_wh, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_wh = dynamic_cast<luci::CircleWhere *>(cloned);
+  ASSERT_NE(nullptr, cloned_wh);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleZerosLike.cpp b/compiler/luci/service/src/Nodes/CircleZerosLike.cpp

new file mode 100644 (file)

index 0000000..2ee4558
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleZerosLike.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleZerosLike *)
+{
+  return _graph->nodes()->create<luci::CircleZerosLike>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleZerosLike.test.cpp b/compiler/luci/service/src/Nodes/CircleZerosLike.test.cpp

new file mode 100644 (file)

index 0000000..6e0a4b3
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleZerosLike.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ZerosLike)
+{
+  auto g = loco::make_graph();
+  auto node_zl = g->nodes()->create<luci::CircleZerosLike>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_zl, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_zl = dynamic_cast<luci::CircleZerosLike *>(cloned);
+  ASSERT_NE(nullptr, cloned_zl);
+}
diff --git a/compiler/luci/service/src/ShapeDescription.cpp b/compiler/luci/service/src/ShapeDescription.cpp

index 01a638f8f574ce9959178a429f34eb43067fdf21..adfb7e342de259f6a315a30e4ecbad39379f5096 100644 (file)
--- a/compiler/luci/service/src/ShapeDescription.cpp
+++ b/compiler/luci/service/src/ShapeDescription.cpp
@@ -31,7 +31,7 @@ ShapeDescription to_shape_description(const luci::CircleNode *circle_node)
  
    res._dims.resize(circle_node->rank());
    for (uint32_t i = 0; i < circle_node->rank(); ++i)
-    res._dims.at(i) = circle_node->dim(i).value();
+    res._dims.at(i) = circle_node->dim(i).known() ? circle_node->dim(i).value() : -1;
  
    return res;
  }
@@ -53,95 +53,12 @@ ShapeDescription to_shape_description(const loco::TensorShape &shape)
    return res;
  }
  
-ShapeDescription to_shape_description(const loco::FeatureShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  // T/F Lite encodes a feature map as a NHWC tensor
-  res._dims.resize(4);
-  res._dims.at(0) = shape.count().value();
-  res._dims.at(1) = shape.height().value();
-  res._dims.at(2) = shape.width().value();
-  res._dims.at(3) = shape.depth().value();
-
-  return res;
-}
-
-ShapeDescription to_shape_description(const loco::FilterShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  // T/F Lite encodes a convolution filter as a NHWC tensor
-  res._dims.resize(4);
-  res._dims.at(0) = shape.count().value();
-  res._dims.at(1) = shape.height().value();
-  res._dims.at(2) = shape.width().value();
-  res._dims.at(3) = shape.depth().value();
-
-  return res;
-}
-
-ShapeDescription to_shape_description(const loco::DepthwiseFilterShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  // T/F Lite encodes a depthwise convolution filter as a [1, H, W, C*M] tensor
-  res._dims.resize(4);
-  res._dims.at(0) = 1;
-  res._dims.at(1) = shape.height().value();
-  res._dims.at(2) = shape.width().value();
-  res._dims.at(3) = shape.depth().value() * shape.multiplier().value();
-
-  return res;
-}
-
-ShapeDescription to_shape_description(const loco::BiasShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  res._dims.resize(1);
-  res._dims.at(0) = shape.length().value();
-
-  return res;
-}
-
-ShapeDescription to_shape_description(const loco::MatrixShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  res._dims.resize(2);
-  res._dims.at(0) = shape.height().value();
-  res._dims.at(1) = shape.width().value();
-
-  return res;
-}
-
  ShapeDescription to_shape_description(const loco::NodeShape &shape)
  {
    switch (shape.domain())
    {
      case loco::Domain::Tensor:
        return to_shape_description(shape.as<loco::TensorShape>());
-    case loco::Domain::Feature:
-      return to_shape_description(shape.as<loco::FeatureShape>());
-    case loco::Domain::Filter:
-      return to_shape_description(shape.as<loco::FilterShape>());
-    case loco::Domain::DepthwiseFilter:
-      return to_shape_description(shape.as<loco::DepthwiseFilterShape>());
-    case loco::Domain::Bias:
-      return to_shape_description(shape.as<loco::BiasShape>());
-    case loco::Domain::Matrix:
-      return to_shape_description(shape.as<loco::MatrixShape>());
      default:
        break;
    }
diff --git a/compiler/luci/service/src/ShapeDescription.test.cpp b/compiler/luci/service/src/ShapeDescription.test.cpp

new file mode 100644 (file)

index 0000000..6e53aac
--- /dev/null
+++ b/compiler/luci/service/src/ShapeDescription.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/ShapeDescription.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/Nodes/CircleConst.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeDescriptionTest, CircleNode)
+{
+  // Use CircleConst as CircleNode
+  luci::CircleConst circle_const;
+  circle_const.shape({1, 2, 3, 4});
+
+  auto sd = luci::to_shape_description(&circle_const);
+
+  ASSERT_EQ(4, sd._dims.size());
+  ASSERT_EQ(1, sd._dims.at(0));
+  ASSERT_TRUE(sd._rank_known);
+}
+
+TEST(ShapeDescriptionTest, TensorShape)
+{
+  loco::TensorShape tensor_shape{1, 2, 3, 4};
+  loco::NodeShape node_shape(tensor_shape);
+
+  auto sd = luci::to_shape_description(node_shape);
+
+  ASSERT_EQ(4, sd._dims.size());
+  ASSERT_EQ(1, sd._dims.at(0));
+  ASSERT_TRUE(sd._rank_known);
+}
+
+TEST(ShapeDescriptionTest, BiasShape_NEG)
+{
+  loco::BiasShape bias_shape;
+  bias_shape.length() = 1;
+  loco::NodeShape node_shape(bias_shape);
+
+  EXPECT_THROW(luci::to_shape_description(node_shape), std::exception);
+}
diff --git a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp

index 341201148084fa9d7d4134010b85ef21d61079d6..c5864f9381fb55e4cd5aab1e147da2d05dcdec1d 100644 (file)
--- a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
+++ b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
@@ -17,12 +17,12 @@
  
  #include "ShapeInfer_StridedSlice.h"
  #include "Check.h"
+#include "CircleShapeInferenceHelper.h"
  
  #include <luci/IR/CircleNode.h>
  #include <loco/IR/DataType.h>
  #include <loco/IR/NodeShape.h>
  #include <oops/InternalExn.h>
-#include <loco/Service/ShapeInference.h>
  
  #include <cmath>
  #include <cstdint>
@@ -245,7 +245,7 @@ loco::TensorShape infer_output_shape(const CircleStridedSlice *node)
    assert(node->new_axis_mask() == 0);
  
    auto op_params = BuildStridedSliceParams(node);
-  loco::TensorShape input_shape = loco::shape_get(input_node).as<loco::TensorShape>();
+  loco::TensorShape input_shape = luci::shape_get(input_node).as<loco::TensorShape>();
  
    uint32_t num_input_axes = input_shape.rank();
    assert(begin_node->size<S32>() <= num_input_axes);
diff --git a/compiler/luci/service/src/Validate.cpp b/compiler/luci/service/src/Validate.cpp

index 3f732b6fea257c0d6848f0c1fd34b9e8a7652f76..7ed14c356c137d07c1a139bd8b556326591c013e 100644 (file)
--- a/compiler/luci/service/src/Validate.cpp
+++ b/compiler/luci/service/src/Validate.cpp
@@ -20,10 +20,9 @@
  #include <luci/Log.h>
  
  #include <loco/IR/NodeShape.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/TypeInference.h>
  
  #include <cassert>
+#include <unordered_map>
  #include <vector>
  
  namespace
@@ -36,7 +35,11 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape
    {
      if (r)
        os << ",";
-    os << tensor_shape.dim(r).value();
+
+    if (tensor_shape.dim(r).known())
+      os << tensor_shape.dim(r).value();
+    else
+      os << "?";
    }
    os << "]";
    return os;
@@ -49,7 +52,11 @@ std::ostream &operator<<(std::ostream &os, const luci::CircleNode *circle_node)
    {
      if (r)
        os << ",";
-    os << circle_node->dim(r).value();
+
+    if (circle_node->dim(r).known())
+      os << circle_node->dim(r).value();
+    else
+      os << "?";
    }
    os << "]";
    return os;
@@ -99,10 +106,24 @@ bool validate_shape_dtype(loco::Graph *g)
      auto go_tensor_shape = graph_out->shape();
      assert(go_tensor_shape);
  
+    // NOTE Even if shape of graph output is [] (which means "shape inference was impossible")
+    //      but shape of CircleNode is not, it can be valid case because shape inference
+    //      algorithm of CircleNode may be upgraded than before. The opposite is possible either.
+    //      If such cases are appeared, following validation code should be fixed.
      bool is_shape_valid = (circle_node->rank() == go_tensor_shape->rank());
      for (uint32_t i = 0; is_shape_valid && i < circle_node->rank(); ++i)
-      if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value())
+    {
+      if (!circle_node->dim(i).known() || !go_tensor_shape->dim(i).known())
+      {
+        // If at least one of two dimensions is unknown,
+        // the unknown dimension can accept any value.
+        INFO(l) << "Unknown dimension is matched with known dimension" << std::endl;
+      }
+      else if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value())
+      {
          is_shape_valid = false;
+      }
+    }
  
      if (is_shape_valid == false)
      {
@@ -124,72 +145,62 @@ bool validate_shape_dtype(loco::Graph *g)
    return true;
  }
  
-bool validate_shape_signature(loco::Graph *g)
-{
-  LOGGER(l);
-
-  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
-  {
-    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    const auto shape_signature = circle_node->shape_signature();
+} // namespace
  
-    if (shape_signature.rank() == 0)
-      continue;
+namespace luci
+{
  
-    // Rank of shape and shape signature should be same
-    if (circle_node->rank() != shape_signature.rank())
-    {
-      INFO(l) << "[luci] Rank of shape signature for " << circle_node->name() << " do not match"
-              << std::endl;
-      return false;
-    }
+bool validate(loco::Graph *g)
+{
+  if (!loco::valid(g))
+    return false;
  
-    bool has_unknown = false;
+  if (!validate_shape_dtype(g))
+    return false;
  
-    // If shape siganture is not -1, dimension value should be same
-    for (uint32_t d = 0; d < shape_signature.rank(); ++d)
-    {
-      if (shape_signature.dim(d) != -1 &&
-          shape_signature.dim(d) != (int32_t)(circle_node->dim(d).value()))
-      {
-        INFO(l) << "[luci] Dimension " << d << "of shape signature for " << circle_node->name()
-                << " do not match" << std::endl;
-        return false;
-      }
+  // TODO add more validation
  
-      if (shape_signature.dim(d) == -1)
-        has_unknown = true;
-    }
+  return true;
+}
  
-    // Shape signature should have at least one -1 value.
-    if (!has_unknown)
-    {
-      INFO(l) << "[luci] Shape signature in " << circle_node->name()
-              << " do not have unknown dimension" << std::endl;
+bool validate_name(loco::Graph *g)
+{
+  auto nodes = g->nodes();
+  for (uint32_t n = 0; n < nodes->size(); ++n)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
+    auto name = node->name();
+    if (name.empty())
        return false;
-    }
    }
  
    return true;
  }
  
-} // namespace
-
-namespace luci
+bool validate_unique_name(luci::Module *m)
  {
+  std::unordered_map<std::string, bool> names_col;
  
-bool validate(loco::Graph *g)
-{
-  if (!loco::valid(g))
-    return false;
-
-  if (!validate_shape_dtype(g))
-    return false;
-
-  if (!validate_shape_signature(g))
-    return false;
+  for (size_t g = 0; g < m->size(); ++g)
+  {
+    auto graph = m->graph(g);
+    auto nodes = graph->nodes();
+    for (uint32_t n = 0; n < nodes->size(); ++n)
+    {
+      auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
+      // skip CircleOutput as it may have same name with from() node
+      auto output = dynamic_cast<luci::CircleOutput *>(node);
+      if (output != nullptr)
+        continue;
+
+      auto name = node->name();
+      auto it = names_col.find(name);
+      if (it != names_col.end())
+        return false;
  
-  // TODO add more validation
+      names_col[name] = true;
+    }
+  }
  
    return true;
  }
diff --git a/compiler/luci/service/src/Validate.test.cpp b/compiler/luci/service/src/Validate.test.cpp

new file mode 100644 (file)

index 0000000..8ce6d89
--- /dev/null
+++ b/compiler/luci/service/src/Validate.test.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/Validate.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleAdd.h>
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->dtype(loco::DataType::S32);
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  SqrtGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    SqrtGraphlet::init(g(), shape);
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+
+    // set output name to _sqrt: CircleOutput may have duplicate name
+    output()->name(_sqrt->name());
+  }
+};
+
+class Sqrt2xGraphlet
+{
+public:
+  Sqrt2xGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt1 = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt1->dtype(loco::DataType::S32);
+    _sqrt1->name("sqrt");
+
+    _sqrt2 = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt2->dtype(loco::DataType::S32);
+    _sqrt2->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt1 = nullptr;
+  luci::CircleSqrt *_sqrt2 = nullptr;
+};
+
+class Sqrt2xGraph : public TestIOGraph, public Sqrt2xGraphlet
+{
+public:
+  Sqrt2xGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    Sqrt2xGraphlet::init(g(), shape);
+
+    _sqrt1->x(input());
+
+    _sqrt2->x(_sqrt1);
+
+    output()->from(_sqrt2);
+  }
+};
+
+} // namespace
+
+TEST(ValidateTest, non_empty_name)
+{
+  SqrtGraph g;
+  g.init({3, 3});
+
+  ASSERT_TRUE(luci::validate_name(g.g()));
+}
+
+TEST(ValidateTest, unique_name)
+{
+  luci::Module module;
+
+  SqrtGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  ASSERT_TRUE(luci::validate_unique_name(&module));
+}
+
+TEST(ValidateTest, unique_name_NEG)
+{
+  luci::Module module;
+
+  Sqrt2xGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  ASSERT_FALSE(luci::validate_unique_name(&module));
+}
diff --git a/compiler/luci/tester/CMakeLists.txt b/compiler/luci/tester/CMakeLists.txt

index 3ac06ef3a41c758b4dd118916c0a7d20656a9cc5..13aab11e79c93b47019bb4ffa4a6f14c7d6907cd 100644 (file)
--- a/compiler/luci/tester/CMakeLists.txt
+++ b/compiler/luci/tester/CMakeLists.txt
@@ -6,6 +6,7 @@ TargetRequire_Return(${REQUIRED_TARGETS})
  
  set(SRCS_READ_TESTER
        src/ReadTester.cpp
+      src/ReadModule.cpp
     )
  
  add_executable(luci_readtester "${SRCS_READ_TESTER}")
@@ -18,6 +19,7 @@ target_link_libraries(luci_readtester PRIVATE safemain)
  
  set(SRCS_WRITE_TESTER
        src/WriteTester.cpp
+      src/ReadModule.cpp
     )
  
  add_executable(luci_writetester "${SRCS_WRITE_TESTER}")
@@ -28,3 +30,22 @@ target_link_libraries(luci_writetester PRIVATE luci_export)
  target_link_libraries(luci_writetester PRIVATE foder)
  target_link_libraries(luci_writetester PRIVATE oops)
  target_link_libraries(luci_writetester PRIVATE safemain)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_readtester_test src/ReadTester.test.cpp ${SRCS_READ_TESTER})
+target_link_libraries(luci_readtester_test luci_import)
+target_link_libraries(luci_readtester_test luci_service)
+target_link_libraries(luci_readtester_test luci_pass)
+target_link_libraries(luci_readtester_test foder)
+
+GTest_AddTest(luci_writetester_test src/WriteTester.test.cpp ${SRCS_WRITE_TESTER})
+target_link_libraries(luci_writetester_test luci_import)
+target_link_libraries(luci_writetester_test luci_service)
+target_link_libraries(luci_writetester_test luci_pass)
+target_link_libraries(luci_writetester_test luci_export)
+target_link_libraries(luci_writetester_test foder)
diff --git a/compiler/luci/tester/src/ReadModule.cpp b/compiler/luci/tester/src/ReadModule.cpp

new file mode 100644 (file)

index 0000000..87c1233
--- /dev/null
+++ b/compiler/luci/tester/src/ReadModule.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReadModule.h"
+
+#include <luci/Pass/CircleShapeInferencePass.h>
+#include <luci/Pass/CircleTypeInferencePass.h>
+#include <luci/Service/Validate.h>
+
+#include <logo/Phase.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+std::unique_ptr<luci::Module> ReadModule(std::string &input_path)
+{
+  // Load model from the file
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data = file_loader.load();
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    return nullptr;
+  }
+
+  luci::Importer importer;
+  auto module = importer.importModule(circle_model);
+  assert(module->size() > 0);
+
+  for (size_t g = 0; g < module->size(); ++g)
+  {
+    auto graph = module->graph(g);
+    if (graph == nullptr)
+      return nullptr;
+
+    {
+      logo::Phase phase;
+
+      phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+      phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+      logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{graph};
+      phase_runner.run(phase);
+    }
+
+    if (!luci::validate(graph))
+      return nullptr;
+  }
+  return module;
+}
diff --git a/compiler/luci/tester/src/ReadModule.h b/compiler/luci/tester/src/ReadModule.h

new file mode 100644 (file)

index 0000000..dfa9bad
--- /dev/null
+++ b/compiler/luci/tester/src/ReadModule.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTER_READ_MODULE_H__
+#define __LUCI_TESTER_READ_MODULE_H__
+
+#include <luci/Importer.h>
+#include <foder/FileLoader.h>
+
+#include <memory>
+#include <string>
+
+std::unique_ptr<luci::Module> ReadModule(std::string &input_path);
+
+#endif // __LUCI_TESTER_READ_MODULE_H__
diff --git a/compiler/luci/tester/src/ReadTester.cpp b/compiler/luci/tester/src/ReadTester.cpp

index f270a232ceb1778dbff6e848989771ea3e1b4483..864343e43af4c4480b9d4770d96a3a1330474d6f 100644 (file)
--- a/compiler/luci/tester/src/ReadTester.cpp
+++ b/compiler/luci/tester/src/ReadTester.cpp
@@ -14,18 +14,9 @@
   * limitations under the License.
   */
  
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
-#include <luci/Service/Validate.h>
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
-
-// Following passes will be removed after refactoring is finished
-#include <luci/Pass/MigrateLegacyShapeDtypePass.h>
+#include "ReadModule.h"
  
  #include <iostream>
-#include <map>
  #include <string>
  
  namespace
@@ -68,45 +59,9 @@ int entry(int argc, char **argv)
  
    std::cout << "[INFO] Circle is '" << input_path << "'" << std::endl;
  
-  // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data = file_loader.load();
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+  auto module = ReadModule(input_path);
+  if (module == nullptr)
      return EXIT_FAILURE;
-  }
-
-  luci::Importer importer;
-  auto module = importer.importModule(circle_model);
-  assert(module->size() > 0);
  
-  for (size_t g = 0; g < module->size(); ++g)
-  {
-    auto graph = module->graph(g);
-    if (graph == nullptr)
-      return 255;
-
-    {
-      luci::ShapeInferencePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-    {
-      luci::TypeInferencePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-    {
-      // This pass will be removed after refactoring is finished
-      luci::MigrateLegacyShapeDtypePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-
-    if (!luci::validate(graph))
-      return 255;
-  }
    return 0;
  }
diff --git a/compiler/luci/tester/src/ReadTester.test.cpp b/compiler/luci/tester/src/ReadTester.test.cpp

new file mode 100644 (file)

index 0000000..f3850d5
--- /dev/null
+++ b/compiler/luci/tester/src/ReadTester.test.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// From ReadTester.cpp
+int entry(int argc, char **argv);
+
+TEST(ReadTesterTest, invalid_argc_NEG)
+{
+  char argv_1[20];
+  strcpy(argv_1, "ReadTesterTest");
+
+  int argc = 1;
+  char *argv[] = {argv_1};
+
+  ASSERT_NE(0, entry(argc, argv));
+}
+
+TEST(ReadTesterTest, invalid_file_NEG)
+{
+  char argv_1[20], argv_2[20];
+  strcpy(argv_1, "ReadTesterTest");
+  strcpy(argv_2, "not_a_file");
+
+  int argc = 2;
+  char *argv[] = {argv_1, argv_2};
+
+  EXPECT_THROW(entry(argc, argv), std::runtime_error);
+}
diff --git a/compiler/luci/tester/src/WriteTester.cpp b/compiler/luci/tester/src/WriteTester.cpp

index 9a6e8de0593035c728bc5ea384f1374589333c47..0d3a1efa220aae64fb31901aca4bb31e79e24bb0 100644 (file)
--- a/compiler/luci/tester/src/WriteTester.cpp
+++ b/compiler/luci/tester/src/WriteTester.cpp
@@ -14,21 +14,13 @@
   * limitations under the License.
   */
  
-#include <foder/FileLoader.h>
+#include "ReadModule.h"
  
-#include <luci/Importer.h>
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
-#include <luci/Service/Validate.h>
  #include <luci/CircleExporter.h>
  #include <oops/InternalExn.h>
  
-// Following passes will be removed after refactoring is finished
-#include <luci/Pass/MigrateLegacyShapeDtypePass.h>
-
  #include <fstream>
  #include <iostream>
-#include <map>
  #include <string>
  
  namespace
@@ -51,12 +43,12 @@ struct CircleExpContract : public luci::CircleExporter::Contract
  {
  public:
    CircleExpContract(loco::Graph *graph, const std::string &filename)
-      : _graph(graph), _filepath(filename)
+    : _graph(graph), _filepath(filename)
    {
      // NOTHING TO DO
    }
    CircleExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
+    : _module(module), _filepath(filename)
    {
      // NOTHING TO DO
    }
@@ -111,47 +103,9 @@ int entry(int argc, char **argv)
  
    std::cout << "[INFO] Circle from '" << input_path << "' to '" << output_path << "'" << std::endl;
  
-  // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data = file_loader.load();
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+  auto module = ReadModule(input_path);
+  if (module == nullptr)
      return EXIT_FAILURE;
-  }
-
-  // Import from input Circle file
-  luci::Importer importer;
-  auto module = importer.importModule(circle_model);
-  assert(module->size() > 0);
-
-  for (size_t g = 0; g < module->size(); ++g)
-  {
-    auto graph = module->graph(g);
-    if (graph == nullptr)
-      return 255;
-
-    {
-      luci::ShapeInferencePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-    {
-      luci::TypeInferencePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-    {
-      // This pass will be removed after refactoring is finished
-      luci::MigrateLegacyShapeDtypePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-
-    if (!luci::validate(graph))
-      return 255;
-  }
  
    // Export to output Circle file
    luci::CircleExporter exporter;
diff --git a/compiler/luci/tester/src/WriteTester.test.cpp b/compiler/luci/tester/src/WriteTester.test.cpp

new file mode 100644 (file)

index 0000000..9d34c5f
--- /dev/null
+++ b/compiler/luci/tester/src/WriteTester.test.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// From WriteTester.cpp
+int entry(int argc, char **argv);
+
+TEST(WriteTesterTest, invalid_argc_NEG)
+{
+  char argv_1[20];
+  strcpy(argv_1, "WriteTesterTest");
+
+  int argc = 1;
+  char *argv[] = {argv_1};
+
+  ASSERT_NE(0, entry(argc, argv));
+}
+
+TEST(WriteTesterTest, invalid_file_NEG)
+{
+  char argv_1[20], argv_2[20], argv_3[20];
+  strcpy(argv_1, "WriteTesterTest");
+  strcpy(argv_2, "not_a_file");
+  strcpy(argv_3, "not_a_file");
+
+  int argc = 3;
+  char *argv[] = {argv_1, argv_2, argv_3};
+
+  EXPECT_THROW(entry(argc, argv), std::runtime_error);
+}
diff --git a/compiler/luci/testhelper/CMakeLists.txt b/compiler/luci/testhelper/CMakeLists.txt

new file mode 100644 (file)

index 0000000..86aa662
--- /dev/null
+++ b/compiler/luci/testhelper/CMakeLists.txt
@@ -0,0 +1,25 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+# NOTE we are using "*.test.cpp" NOT to be included in static analyzer tools
+
+# testhelper library itself
+set(HELPER_SOURCE
+      src/TestShape.test.cpp
+   )
+
+add_library(luci_testhelper STATIC ${HELPER_SOURCE})
+target_include_directories(luci_testhelper PRIVATE src)
+target_include_directories(luci_testhelper PUBLIC include)
+target_link_libraries(luci_testhelper luci_lang)
+
+# test for testhelper library
+set(TESTER_SOURCE
+      src/TestIOGraph.test.cpp
+   )
+
+GTest_AddTest(luci_testhelper_test ${TESTER_SOURCE})
+target_link_libraries(luci_testhelper_test luci_testhelper)
diff --git a/compiler/luci/testhelper/README.md b/compiler/luci/testhelper/README.md

new file mode 100644 (file)

index 0000000..6bdb92a
--- /dev/null
+++ b/compiler/luci/testhelper/README.md
@@ -0,0 +1,3 @@
+# luci-testhelper
+
+_luci-testhelper_ provides Helper classes for unit testing
diff --git a/compiler/luci/testhelper/include/luci/test/TestIOGraph.h b/compiler/luci/testhelper/include/luci/test/TestIOGraph.h

new file mode 100644 (file)

index 0000000..ae04f4d
--- /dev/null
+++ b/compiler/luci/testhelper/include/luci/test/TestIOGraph.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
+#define __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
+
+#include "TestShape.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <stdexcept>
+
+namespace luci
+{
+namespace test
+{
+
+/**
+ * @brief Graphlet with Inputs and loco::Graph for multiple inputs
+ * @note  Every Graph will have Input(s) and Output(s)
+ *        We put loco::Graph only in IsGraphlet not to declare separate
+ *        class for loco::Graph
+ */
+template <unsigned N> class TestIsGraphlet
+{
+public:
+  TestIsGraphlet()
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_inputs[n] = nullptr;
+      _inputs[n] = nullptr;
+    }
+    _g = loco::make_graph();
+  }
+
+public:
+  virtual void init(loco::Graph *g, const std::initializer_list<ShapeU32> shape_in)
+  {
+    if (shape_in.size() != N)
+      throw std::runtime_error("Failed to init TestIsGraphlet");
+
+    auto shpin = shape_in.begin();
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_inputs[n] = g->inputs()->create();
+
+      _inputs[n] = g->nodes()->create<luci::CircleInput>();
+      _inputs[n]->shape(*shpin);
+      _inputs[n]->shape_status(luci::ShapeStatus::VALID);
+      _inputs[n]->dtype(loco::DataType::FLOAT32);
+      _inputs[n]->name("input_" + std::to_string(n));
+
+      _inputs[n]->index(_graph_inputs[n]->index());
+
+      auto input_shape = std::make_unique<loco::TensorShape>();
+      set_shape_vector(input_shape.get(), *shpin);
+      _graph_inputs[n]->shape(std::move(input_shape));
+      _graph_inputs[n]->dtype(loco::DataType::FLOAT32);
+
+      shpin++;
+    }
+  }
+
+public:
+  loco::Graph *g(void) { return _g.get(); }
+  luci::CircleInput *input(int idx) { return _inputs[idx]; }
+  uint32_t num_inputs(void) { return N; }
+
+public:
+  void transfer_to(luci::Module *module)
+  {
+    // WARNING: after g is transfered, _graph_inputs, _inputs
+    //          and _graph_outputs, _outputs in TestOsGraphlet will be invalid.
+    //          arrays are not cleared as this is just helpers to unit tests
+    module->add(std::move(_g));
+  }
+
+protected:
+  std::unique_ptr<loco::Graph> _g;
+  std::array<loco::GraphInput *, N> _graph_inputs;
+  std::array<luci::CircleInput *, N> _inputs;
+};
+
+/**
+ * @brief Graphlet with one Input
+ */
+class TestIGraphlet : public TestIsGraphlet<1>
+{
+public:
+  virtual void init(loco::Graph *g, const ShapeU32 shape_in)
+  {
+    TestIsGraphlet<1>::init(g, {shape_in});
+  }
+
+  luci::CircleInput *input() { return _inputs[0]; }
+};
+
+/**
+ * @brief Graphlet with Outputs for multiple outputs
+ */
+template <unsigned N> class TestOsGraphlet
+{
+public:
+  TestOsGraphlet()
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_outputs[n] = nullptr;
+      _outputs[n] = nullptr;
+    }
+  }
+
+public:
+  virtual void init(loco::Graph *g, const std::initializer_list<ShapeU32> shape_out)
+  {
+    if (shape_out.size() != N)
+      throw std::runtime_error("Failed to init TestOsGraphlet");
+
+    auto shpout = shape_out.begin();
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_outputs[n] = g->outputs()->create();
+
+      _outputs[n] = g->nodes()->create<luci::CircleOutput>();
+      _outputs[n]->shape(*shpout);
+      _outputs[n]->shape_status(luci::ShapeStatus::VALID);
+      _outputs[n]->dtype(loco::DataType::FLOAT32);
+      _outputs[n]->name("output_" + std::to_string(n));
+
+      _outputs[n]->index(_graph_outputs[n]->index());
+
+      auto output_shape = std::make_unique<loco::TensorShape>();
+      set_shape_vector(output_shape.get(), *shpout);
+      _graph_outputs[n]->shape(std::move(output_shape));
+      _graph_outputs[n]->dtype(loco::DataType::FLOAT32);
+
+      shpout++;
+    }
+  }
+
+public:
+  luci::CircleOutput *output(int idx) { return _outputs[idx]; }
+
+protected:
+  std::array<loco::GraphOutput *, N> _graph_outputs;
+  std::array<luci::CircleOutput *, N> _outputs;
+};
+
+/**
+ * @brief Graphlet with one Output
+ */
+class TestOGraphlet : public TestOsGraphlet<1>
+{
+public:
+  virtual void init(loco::Graph *g, const ShapeU32 shape_out)
+  {
+    TestOsGraphlet<1>::init(g, {shape_out});
+  }
+
+  luci::CircleOutput *output() { return _outputs[0]; }
+};
+
+/**
+ * @brief Graph with Input and Output
+ */
+class TestIOGraph : public TestIGraphlet, public TestOGraphlet
+{
+public:
+  TestIOGraph() = default;
+
+public:
+  virtual void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIGraphlet::init(g(), shape_in);
+    TestOGraphlet::init(g(), shape_out);
+  }
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
diff --git a/compiler/luci/testhelper/include/luci/test/TestShape.h b/compiler/luci/testhelper/include/luci/test/TestShape.h

new file mode 100644 (file)

index 0000000..1a5adf7
--- /dev/null
+++ b/compiler/luci/testhelper/include/luci/test/TestShape.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTHELPER_TEST_SHAPE_H__
+#define __LUCI_TESTHELPER_TEST_SHAPE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <initializer_list>
+
+namespace luci
+{
+namespace test
+{
+
+using ShapeU32 = std::initializer_list<uint32_t>;
+using ShapeI32 = std::initializer_list<int32_t>;
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values);
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values);
+
+uint32_t num_elements(const ShapeU32 shape);
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_TESTHELPER_TEST_SHAPE_H__
diff --git a/compiler/luci/testhelper/src/TestIOGraph.test.cpp b/compiler/luci/testhelper/src/TestIOGraph.test.cpp

new file mode 100644 (file)

index 0000000..8a7d1e0
--- /dev/null
+++ b/compiler/luci/testhelper/src/TestIOGraph.test.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/test/TestIOGraph.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class AddGraphlet
+{
+public:
+  AddGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add->name("add");
+  }
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+};
+
+class ConvGraphlet
+{
+public:
+  ConvGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _conv = g->nodes()->create<luci::CircleConv2D>();
+    _conv->name("conv");
+  }
+
+protected:
+  luci::CircleConv2D *_conv = nullptr;
+};
+
+} // namespace
+
+namespace
+{
+
+class TestOfTestIOGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  TestOfTestIOGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    SqrtGraphlet::init(g());
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+  }
+};
+
+class TestOfTestI2OGraph : public TestIsGraphlet<2>, public TestOGraphlet, public AddGraphlet
+{
+public:
+  TestOfTestI2OGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIsGraphlet<2>::init(g(), {{2, 3}, {2, 3}});
+    TestOsGraphlet<1>::init(g(), {{2, 3}});
+    AddGraphlet::init(g());
+
+    _add->x(input(0));
+    _add->y(input(1));
+
+    output()->from(_add);
+  }
+};
+
+class TestOfTestI3OGraph : public TestIsGraphlet<3>, public TestOGraphlet, public ConvGraphlet
+{
+public:
+  TestOfTestI3OGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIsGraphlet<3>::init(g(), {{2, 3, 3, 4}, {1, 1}, {4}});
+    TestOsGraphlet<1>::init(g(), {{2, 3, 3, 4}});
+    ConvGraphlet::init(g());
+
+    _conv->input(input(0));
+    _conv->filter(input(1));
+    _conv->bias(input(2));
+
+    output()->from(_conv);
+  }
+};
+
+class FailOfTestI3OGraph : public TestIsGraphlet<3>, public TestOGraphlet, public ConvGraphlet
+{
+public:
+  FailOfTestI3OGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIsGraphlet<3>::init(g(), {{2, 3, 3, 4}, {1, 1}});
+    TestOsGraphlet<1>::init(g(), {{2, 3, 3, 4}});
+    ConvGraphlet::init(g());
+
+    _conv->input(input(0));
+    _conv->filter(input(1));
+    _conv->bias(input(2));
+
+    output()->from(_conv);
+  }
+};
+
+} // namespace
+
+TEST(TestIOGraphTest, IOGraph_init)
+{
+  TestOfTestIOGraph tg;
+  tg.init();
+
+  SUCCEED();
+}
+
+TEST(TestIOGraphTest, I2OGraph_init)
+{
+  TestOfTestI2OGraph tg;
+  tg.init();
+
+  SUCCEED();
+}
+
+TEST(TestIOGraphTest, I3OGraph_init)
+{
+  TestOfTestI3OGraph tg;
+  tg.init();
+
+  SUCCEED();
+}
+
+TEST(TestIOGraphTest, I3OGraph_input_number_mismatch_NEG)
+{
+  FailOfTestI3OGraph fg;
+  EXPECT_THROW(fg.init(), std::runtime_error);
+}
diff --git a/compiler/luci/testhelper/src/TestShape.test.cpp b/compiler/luci/testhelper/src/TestShape.test.cpp

new file mode 100644 (file)

index 0000000..9838c61
--- /dev/null
+++ b/compiler/luci/testhelper/src/TestShape.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/test/TestShape.h"
+
+/**
+ * @note This file does not hold any test cases but provides methods for tests
+ */
+
+namespace luci
+{
+namespace test
+{
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values)
+{
+  uint32_t r = 0;
+  shape->rank(values.size());
+  for (auto v : values)
+    shape->dim(r++).set(v);
+}
+
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values)
+{
+  const_node->rank(1);
+  const_node->dim(0).set(values.size());
+  const_node->shape_status(luci::ShapeStatus::VALID);
+  const_node->dtype(loco::DataType::S32);
+  const_node->size<loco::DataType::S32>(values.size());
+  uint32_t idx = 0;
+  for (auto val : values)
+    const_node->at<loco::DataType::S32>(idx++) = val;
+}
+
+uint32_t num_elements(const ShapeU32 shape)
+{
+  uint32_t result = 1;
+  for (auto val : shape)
+    result = result * val;
+  return result;
+}
+
+} // namespace test
+} // namespace luci
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst

index 897d41983fa6a80683e3099700fcd74ba56e0958..a278fa2564f843e5ba0ae3285a96e9e257aed396 100644 (file)
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -51,6 +51,8 @@ addread(ExpandDims_000)
  addread(ExpandDims_001)
  addread(ExpandDims_002)
  addread(ExpandDims_003)
+addread(ExpandDims_004)
+addread(FakeQuant_000)
  addread(Fill_000)
  addread(Fill_001)
  addread(Floor_000)
@@ -151,6 +153,7 @@ addread(SelectV2_002)
  addread(Shape_000)
  addread(Sin_000)
  addread(Slice_000)
+addread(Slice_001)
  addread(Softmax_000)
  addread(Softmax_U8_000)
  addread(SpaceToBatchND_000)
@@ -166,6 +169,7 @@ addread(Sqrt_000)
  addread(Square_000)
  addread(SquaredDifference_000)
  addread(Squeeze_000)
+addread(Squeeze_001)
  addread(StridedSlice_000)
  addread(StridedSlice_001)
  addread(StridedSlice_002)
@@ -268,6 +272,8 @@ addwrite(ExpandDims_000)
  addwrite(ExpandDims_001)
  addwrite(ExpandDims_002)
  addwrite(ExpandDims_003)
+addwrite(ExpandDims_004)
+addwrite(FakeQuant_000)
  addwrite(Fill_000)
  addwrite(Fill_001)
  addwrite(Floor_000)
@@ -367,6 +373,7 @@ addwrite(SelectV2_002)
  addwrite(Shape_000)
  addwrite(Sin_000)
  addwrite(Slice_000)
+addwrite(Slice_001)
  addwrite(Softmax_000)
  addwrite(Softmax_U8_000)
  addwrite(SpaceToBatchND_000)
@@ -382,6 +389,7 @@ addwrite(Sqrt_000)
  addwrite(Square_000)
  addwrite(SquaredDifference_000)
  addwrite(Squeeze_000)
+addwrite(Squeeze_001)
  addwrite(StridedSlice_000)
  addwrite(StridedSlice_001)
  addwrite(StridedSlice_002)
diff --git a/compiler/mir-interpreter/src/ops/Add.cpp b/compiler/mir-interpreter/src/ops/Add.cpp

index 631b854b71d68226cc00452c9b3f08d6df0354ce..f80c63c15001a989cd830002e5fdd4fa52cff809 100644 (file)
--- a/compiler/mir-interpreter/src/ops/Add.cpp
+++ b/compiler/mir-interpreter/src/ops/Add.cpp
@@ -106,13 +106,13 @@ void AddImpl<uint8_t>::run(const TensorVariant &lhs, const TensorVariant &rhs, T
      const int32_t shifted_lhs_val = lhs_val * (1 << left_shift);
      const int32_t shifted_rhs_val = rhs_val * (1 << left_shift);
      const int32_t scaled_lhs_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
+      MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
      const int32_t scaled_rhs_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
+      MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
      const int32_t raw_sum = scaled_lhs_val + scaled_rhs_val;
      const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
-        output_offset;
+      MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
+      output_offset;
      const int32_t clamped_output = std::min(output_max, std::max(output_min, raw_output));
      res_accessor.at(index) = static_cast<uint8_t>(clamped_output);
    }
diff --git a/compiler/mir-interpreter/src/ops/AvgPool2D.cpp b/compiler/mir-interpreter/src/ops/AvgPool2D.cpp

index 3f1d65100d4f12eb6351b1170fe4f7d5cb271ca3..3f74cd1e8d0ec272ad185edd3bd272a08af36fd6 100644 (file)
--- a/compiler/mir-interpreter/src/ops/AvgPool2D.cpp
+++ b/compiler/mir-interpreter/src/ops/AvgPool2D.cpp
@@ -72,7 +72,7 @@ void AvgPool2DImpl<T>::run(const ops::AvgPool2DOp &op, const TensorVariant &inpu
        // Assuming NHWC format.
        for (int i = 0; i < num_spatial_dims; ++i)
          in_index.at(1 + i) =
-            out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+          out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
  
        if (in_range.contains(in_index))
        {
@@ -145,7 +145,7 @@ void AvgPool2DImpl<uint8_t>::run(const ops::AvgPool2DOp &op, const TensorVariant
        // Assuming NHWC format.
        for (int i = 0; i < num_spatial_dims; ++i)
          in_index.at(1 + i) =
-            out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+          out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
  
        if (in_range.contains(in_index))
        {
diff --git a/compiler/mir-interpreter/src/ops/CappedReLU.cpp b/compiler/mir-interpreter/src/ops/CappedReLU.cpp

index 1ac95ac16ea70067c10453be6b147cd44ffbece0..5b348d463a3169ec9a85a99a55cb9e32a5048be8 100644 (file)
--- a/compiler/mir-interpreter/src/ops/CappedReLU.cpp
+++ b/compiler/mir-interpreter/src/ops/CappedReLU.cpp
@@ -68,7 +68,7 @@ template <> struct CappedReLUImpl<uint8_t>
      {
        auto value = dequantize(arg_accessor.at(index), quant_info);
        auto out_value =
-          quantize(std::min(std::max(value, 0.0f), cap), result.getType().getQuantization());
+        quantize(std::min(std::max(value, 0.0f), cap), result.getType().getQuantization());
        res_accessor.at(index) = out_value;
      }
    }
diff --git a/compiler/mir-interpreter/src/ops/Concat.cpp b/compiler/mir-interpreter/src/ops/Concat.cpp

index 99fe00c3107b47e308ada354f14e1fafb0d486f6..3c71709e68b9e6e2dcdd18c312874f1b86ccafa3 100644 (file)
--- a/compiler/mir-interpreter/src/ops/Concat.cpp
+++ b/compiler/mir-interpreter/src/ops/Concat.cpp
@@ -90,8 +90,8 @@ template <> struct ConcatImpl<uint8_t>
  };
  
  void ConcatImpl<uint8_t>::run(
-    const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
-    mir::TensorVariant &output)
+  const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
+  mir::TensorVariant &output)
  {
    const size_t inputs_count = inputs.size();
    std::vector<int32_t> input_zeropoints(inputs_count);
@@ -154,7 +154,7 @@ void ConcatImpl<uint8_t>::run(
          for (int j = 0; j < copy_size; ++j)
          {
            const int32_t value =
-              static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
            output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
          }
        }
diff --git a/compiler/mir-interpreter/src/ops/Conv2D.cpp b/compiler/mir-interpreter/src/ops/Conv2D.cpp

index c9b98a56f48d3477ff34015d985ad8d290525229..9f4339bdaedbed431cd50962142a07e16af71c85 100644 (file)
--- a/compiler/mir-interpreter/src/ops/Conv2D.cpp
+++ b/compiler/mir-interpreter/src/ops/Conv2D.cpp
@@ -109,9 +109,9 @@ void Conv2DImpl<T>::run(const TensorVariant &input, const TensorVariant &kernel,
                    if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
                    {
                      const std::int32_t in_offset =
-                        calcOffset(input_shape, batch, in_y, in_x, in_group_offset + in_c);
-                    const std::int32_t kernel_offset = calcOffset(
-                        kernel_shape, out_group_offset + out_c, kernel_y, kernel_x, in_c);
+                      calcOffset(input_shape, batch, in_y, in_x, in_group_offset + in_c);
+                    const std::int32_t kernel_offset =
+                      calcOffset(kernel_shape, out_group_offset + out_c, kernel_y, kernel_x, in_c);
                      const T input_val = input_data[in_offset];
                      const T kernel_val = kernel_data[kernel_offset];
                      sum += kernel_val * input_val;
@@ -121,7 +121,7 @@ void Conv2DImpl<T>::run(const TensorVariant &input, const TensorVariant &kernel,
              }
  
              const std::int32_t out_offset =
-                calcOffset(output_shape, batch, out_y, out_x, out_group_offset + out_c);
+              calcOffset(output_shape, batch, out_y, out_x, out_group_offset + out_c);
              result_data[out_offset] = sum;
            }
          }
diff --git a/compiler/mir-interpreter/src/ops/DeConv2D.cpp b/compiler/mir-interpreter/src/ops/DeConv2D.cpp

index 746d8c87c979e18a8b9cfd86d18dde9d7097630b..f9e837ddb8c1674e247963bde582ec3453754344 100644 (file)
--- a/compiler/mir-interpreter/src/ops/DeConv2D.cpp
+++ b/compiler/mir-interpreter/src/ops/DeConv2D.cpp
@@ -98,9 +98,9 @@ void DeConv2DImpl<T>::run(const TensorVariant &input, const TensorVariant &kerne
                  for (int32_t out_c = 0; out_c < num_out_channels; ++out_c)
                  {
                    const int32_t kernel_offset =
-                      calcOffset(kernel_shape, in_c, kernel_y, kernel_x, out_c);
+                    calcOffset(kernel_shape, in_c, kernel_y, kernel_x, out_c);
                    const int32_t output_offset =
-                      calcOffset(output_shape, batch, out_y, out_x, out_c);
+                    calcOffset(output_shape, batch, out_y, out_x, out_c);
                    const T kernel_val = kernel_data[kernel_offset];
                    output_data[output_offset] += input_val * kernel_val;
                  }
diff --git a/compiler/mir-interpreter/src/ops/Gather.cpp b/compiler/mir-interpreter/src/ops/Gather.cpp

index 4328c26b23ae8cf515c6dd6134cbce8b35b23c06..11bffd411d45e88040ecd91de2e658fdb0432f29 100644 (file)
--- a/compiler/mir-interpreter/src/ops/Gather.cpp
+++ b/compiler/mir-interpreter/src/ops/Gather.cpp
@@ -64,7 +64,7 @@ void GatherImpl<T, IndicesT>::run(const TensorVariant &datav, const TensorVarian
        for (int32_t inner = 0; inner < inner_size; inner++)
        {
          output.atOffset((outer * num_indices + i) * inner_size + inner) =
-            data.atOffset((outer * axis_size + index) * inner_size + inner);
+          data.atOffset((outer * axis_size + index) * inner_size + inner);
        }
      }
    }
diff --git a/compiler/mir-interpreter/src/ops/MaxPool2D.cpp b/compiler/mir-interpreter/src/ops/MaxPool2D.cpp

index cec2f59841441d25d4ee7c63ec1c1005b29952b5..6be1ccf089db615535118aab9c917172abd718e9 100644 (file)
--- a/compiler/mir-interpreter/src/ops/MaxPool2D.cpp
+++ b/compiler/mir-interpreter/src/ops/MaxPool2D.cpp
@@ -72,7 +72,7 @@ void MaxPool2DImpl<T>::run(const TensorVariant &inputv, const ops::MaxPool2DOp &
        // Assuming NHWC format.
        for (int i = 0; i < num_spatial_dims; ++i)
          in_index.at(1 + i) =
-            out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+          out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
  
        if (in_range.contains(in_index))
        {
@@ -137,7 +137,7 @@ void MaxPool2DImpl<uint8_t>::run(const TensorVariant &input, const ops::MaxPool2
        // Assuming NHWC format.
        for (int i = 0; i < num_spatial_dims; ++i)
          in_index.at(1 + i) =
-            out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+          out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
  
        if (in_range.contains(in_index))
        {
diff --git a/compiler/mir-interpreter/src/ops/QuantizationHelpers.h b/compiler/mir-interpreter/src/ops/QuantizationHelpers.h

index 8faeffbd3b7321ff458adcdc2c1f76081396a80f..3ab6f1edc15a20eb855f32efee5ff4322018b511 100644 (file)
--- a/compiler/mir-interpreter/src/ops/QuantizationHelpers.h
+++ b/compiler/mir-interpreter/src/ops/QuantizationHelpers.h
@@ -110,7 +110,7 @@ inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multip
    int left_shift = shift > 0 ? shift : 0;
    int right_shift = shift > 0 ? 0 : -shift;
    return RoundingDivideByPOT(
-      SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+    SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
  }
  
  inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x,
diff --git a/compiler/mir-interpreter/src/ops/Softmax.cpp b/compiler/mir-interpreter/src/ops/Softmax.cpp

index f263f967d287e90017d76b8dc65a0e89ac3796a2..554f8c3713ac9d0f309ef6690d255fc3422b46a4 100644 (file)
--- a/compiler/mir-interpreter/src/ops/Softmax.cpp
+++ b/compiler/mir-interpreter/src/ops/Softmax.cpp
@@ -70,7 +70,7 @@ void SoftmaxImpl<T>::run(const mir::TensorVariant &arg, int axis, mir::TensorVar
      mir::Index expsum_index = res_index;
      expsum_index.at(axis) = 0;
      res_accessor.at(res_index) =
-        std::exp(arg_accessor.at(res_index)) / expsum_accessor.at(expsum_index);
+      std::exp(arg_accessor.at(res_index)) / expsum_accessor.at(expsum_index);
    }
  }
  
@@ -140,7 +140,7 @@ void SoftmaxImpl<uint8_t>::run(const mir::TensorVariant &input, int axis,
        const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp;
        const int32_t prob_quantized = static_cast<int32_t>(prob_rescaled + 0.5);
        output_data[j] =
-          static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+        static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
      }
      input_data += last_dim;
      output_data += last_dim;
diff --git a/compiler/mir/include/mir/Quantization.h b/compiler/mir/include/mir/Quantization.h

index d266ee00d730f4da1bf2e8353cb2d64645db8fb9..901915a74a1249000c3b2888756f7bdd1af068da 100644 (file)
--- a/compiler/mir/include/mir/Quantization.h
+++ b/compiler/mir/include/mir/Quantization.h
@@ -26,7 +26,7 @@ public:
    AffineQuantization() = default;
  
    AffineQuantization(float scale, int zero_point)
-      : _scale(scale), _zero_point(zero_point), _empty(false)
+    : _scale(scale), _zero_point(zero_point), _empty(false)
    {
    }
  
diff --git a/compiler/mir/include/mir/ShapeRange.h b/compiler/mir/include/mir/ShapeRange.h

index a450bf090ae03fea73148566829a15b128b739cb..70b29715f2084588078863bb52348d145cdcb7fd 100644 (file)
--- a/compiler/mir/include/mir/ShapeRange.h
+++ b/compiler/mir/include/mir/ShapeRange.h
@@ -26,7 +26,7 @@ namespace mir
  {
  
  class ShapeIter
-    : public std::iterator<std::forward_iterator_tag, Index, std::size_t, Index *, Index &>
+  : public std::iterator<std::forward_iterator_tag, Index, std::size_t, Index *, Index &>
  {
  public:
    ShapeIter &operator++()
diff --git a/compiler/mir/include/mir/TensorType.h b/compiler/mir/include/mir/TensorType.h

index 98797d687302607d78748fa6aee60679a9d3010b..b94a26eebfa9e612e145d1d90998d0397035fefd 100644 (file)
--- a/compiler/mir/include/mir/TensorType.h
+++ b/compiler/mir/include/mir/TensorType.h
@@ -34,7 +34,7 @@ public:
    }
  
    TensorType(DataType element_type, const Shape &shape, const AffineQuantization &quant)
-      : _element_type(element_type), _shape(shape), _quantization(quant)
+    : _element_type(element_type), _shape(shape), _quantization(quant)
    {
    }
  
diff --git a/compiler/mir/include/mir/ops/AvgPool2DOp.h b/compiler/mir/include/mir/ops/AvgPool2DOp.h

index 47fe058ee596ecd58255493ff05c6c63ad7c1f5d..37fb664372da1e158fad070257caeaa0a660bfb1 100644 (file)
--- a/compiler/mir/include/mir/ops/AvgPool2DOp.h
+++ b/compiler/mir/include/mir/ops/AvgPool2DOp.h
@@ -32,7 +32,7 @@ class AvgPool2DOp : public Operation
  {
  public:
    AvgPool2DOp(Output *arg, const AvgPool2DOpAttributes &attributes)
-      : Operation(Type::avgPool2D, {arg}), _attributes(attributes)
+    : Operation(Type::avgPool2D, {arg}), _attributes(attributes)
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/ConcatOp.h b/compiler/mir/include/mir/ops/ConcatOp.h

index 4f46d4449ffc65681083c1a82bb5ad58a1c41c70..d1f9142fac38f0e1aff875269bf8d0147a08a358 100644 (file)
--- a/compiler/mir/include/mir/ops/ConcatOp.h
+++ b/compiler/mir/include/mir/ops/ConcatOp.h
@@ -31,7 +31,7 @@ class ConcatOp : public Operation
  {
  public:
    ConcatOp(const std::vector<Output *> &args, int32_t axis)
-      : Operation(Type::concat, args), _axis(axis)
+    : Operation(Type::concat, args), _axis(axis)
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/Conv2DOp.h b/compiler/mir/include/mir/ops/Conv2DOp.h

index ec818dae52b22a6ae788519e5fcfef933edc2cf9..f8590a947337c830f5a1e002cfdfecb8de3b386d 100644 (file)
--- a/compiler/mir/include/mir/ops/Conv2DOp.h
+++ b/compiler/mir/include/mir/ops/Conv2DOp.h
@@ -30,13 +30,13 @@ class Conv2DOp : public Operation
  {
  public:
    Conv2DOp(Output *input, Output *kernel, const Conv2DOpAttributes &attributes)
-      : Operation(Type::conv2D, {input, kernel}), _attributes(attributes)
+    : Operation(Type::conv2D, {input, kernel}), _attributes(attributes)
    {
      inferOutputTypes();
    }
  
    Conv2DOp(Output *input, Output *kernel, Output *bias, const Conv2DOpAttributes &attributes)
-      : Operation(Type::conv2D, {input, kernel, bias}), _attributes(attributes)
+    : Operation(Type::conv2D, {input, kernel, bias}), _attributes(attributes)
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/Deconv2DOp.h b/compiler/mir/include/mir/ops/Deconv2DOp.h

index a7b5480285a12016be92ad2bd38137edeb0662e4..9565eeb37e895cdce4783e81f0dc8c307c432007 100644 (file)
--- a/compiler/mir/include/mir/ops/Deconv2DOp.h
+++ b/compiler/mir/include/mir/ops/Deconv2DOp.h
@@ -33,14 +33,14 @@ class DeConv2DOp : public Operation
  {
  public:
    DeConv2DOp(Output *input, Output *kernel, const Deconv2DOpAttributes &attributes)
-      : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
+    : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
    {
      inferOutputTypes();
    }
  
    DeConv2DOp(Output *input, Output *kernel, const Deconv2DOpAttributes &attributes,
               const Shape &output_shape)
-      : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
+    : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
    {
      assert(input->getElementType() == kernel->getElementType());
      setOutputType(0, {input->getElementType(), output_shape});
diff --git a/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h b/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h

index 347b8e94f07216d88e47492e39803ec32e03d382..558d60a4a655f87ae4941ef9b091e8f168d65cf3 100644 (file)
--- a/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h
+++ b/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h
@@ -30,14 +30,14 @@ class DepthwiseConv2DOp : public Operation
  {
  public:
    DepthwiseConv2DOp(Output *input, Output *kernel, const Conv2DOpAttributes &attributes)
-      : Operation(Type::depthwiseConv, {input, kernel}), _attributes(attributes)
+    : Operation(Type::depthwiseConv, {input, kernel}), _attributes(attributes)
    {
      inferOutputTypes();
    }
  
    DepthwiseConv2DOp(Output *input, Output *kernel, Output *bias,
                      const Conv2DOpAttributes &attributes)
-      : Operation(Type::depthwiseConv, {input, kernel, bias}), _attributes(attributes)
+    : Operation(Type::depthwiseConv, {input, kernel, bias}), _attributes(attributes)
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/FullyConnectedOp.h b/compiler/mir/include/mir/ops/FullyConnectedOp.h

index 589c42df91c0f12057aa2065fd1c092f63bf971c..f937df539e8a20b33ec7959ce51387a73e7e04c7 100644 (file)
--- a/compiler/mir/include/mir/ops/FullyConnectedOp.h
+++ b/compiler/mir/include/mir/ops/FullyConnectedOp.h
@@ -29,13 +29,13 @@ class FullyConnectedOp : public Operation
  {
  public:
    FullyConnectedOp(Output *input, Output *weights)
-      : Operation(Type::fullyConnected, {input, weights})
+    : Operation(Type::fullyConnected, {input, weights})
    {
      inferOutputTypes();
    }
  
    FullyConnectedOp(Output *input, Output *weights, Output *bias)
-      : Operation(Type::fullyConnected, {input, weights, bias})
+    : Operation(Type::fullyConnected, {input, weights, bias})
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/GatherOp.h b/compiler/mir/include/mir/ops/GatherOp.h

index 899c9f169f784be6952bd3abc2ac98df4f6869a4..58ea0407476a051a857580de3c37326a260f13fe 100644 (file)
--- a/compiler/mir/include/mir/ops/GatherOp.h
+++ b/compiler/mir/include/mir/ops/GatherOp.h
@@ -33,7 +33,7 @@ class GatherOp : public Operation
  {
  public:
    GatherOp(Output *data, Output *indices, int32_t axis)
-      : Operation(Type::gather, {data, indices}), _axis(axis)
+    : Operation(Type::gather, {data, indices}), _axis(axis)
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/MaxPool2DOp.h b/compiler/mir/include/mir/ops/MaxPool2DOp.h

index 7c5df4a5377dc4b2636869c2d7a1eb32517f2475..4345cfc1840d83ecc3038f157c4c6788b71ebf98 100644 (file)
--- a/compiler/mir/include/mir/ops/MaxPool2DOp.h
+++ b/compiler/mir/include/mir/ops/MaxPool2DOp.h
@@ -32,7 +32,7 @@ class MaxPool2DOp : public Operation
  {
  public:
    MaxPool2DOp(Output *arg, const MaxPool2DOpAttributes &attributes)
-      : Operation(Type::maxPool2D, {arg}), _attributes(attributes)
+    : Operation(Type::maxPool2D, {arg}), _attributes(attributes)
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/PadOp.h b/compiler/mir/include/mir/ops/PadOp.h

index 76453acecdcb727bacd704e555045499c725e437..d229a97bd512135aec473b6468b12e60ab128a06 100644 (file)
--- a/compiler/mir/include/mir/ops/PadOp.h
+++ b/compiler/mir/include/mir/ops/PadOp.h
@@ -29,7 +29,7 @@ class PadOp : public Operation
  {
  public:
    PadOp(Output *arg, const PadOpAttributes &attributes)
-      : Operation(Type::pad, {arg}), _attributes(attributes)
+    : Operation(Type::pad, {arg}), _attributes(attributes)
    {
      assert(_attributes.padding_before.size() == _attributes.padding_after.size());
      inferOutputTypes();
diff --git a/compiler/mir/include/mir/ops/ReduceMeanOp.h b/compiler/mir/include/mir/ops/ReduceMeanOp.h

index add47ac759aff3d121e58bebf35703fb4ba67e04..5759b845ebcf4615910f14aed37ce178e0eec3f3 100644 (file)
--- a/compiler/mir/include/mir/ops/ReduceMeanOp.h
+++ b/compiler/mir/include/mir/ops/ReduceMeanOp.h
@@ -29,7 +29,7 @@ class ReduceMeanOp : public ReduceOp
  {
  public:
    ReduceMeanOp(Output *arg, const std::vector<int> &reduction_dims, bool keep_dims)
-      : ReduceOp(Type::reduceMean, arg, reduction_dims, keep_dims)
+    : ReduceOp(Type::reduceMean, arg, reduction_dims, keep_dims)
    {
    }
  
diff --git a/compiler/mir/include/mir/ops/ReduceOp.h b/compiler/mir/include/mir/ops/ReduceOp.h

index 0f46a4596a71f8b7bff1dfdbc508f0f6f1eccba5..5204a0903af0c68810227576a746265649b18cc3 100644 (file)
--- a/compiler/mir/include/mir/ops/ReduceOp.h
+++ b/compiler/mir/include/mir/ops/ReduceOp.h
@@ -29,7 +29,7 @@ class ReduceOp : public Operation
  {
  protected:
    ReduceOp(Type type, Output *arg, const std::vector<int> &reduction_dims, bool keep_dims)
-      : Operation(type, {arg}), _reduction_dims(reduction_dims), _keep_dims(keep_dims)
+    : Operation(type, {arg}), _reduction_dims(reduction_dims), _keep_dims(keep_dims)
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/ResizeOp.h b/compiler/mir/include/mir/ops/ResizeOp.h

index 51e1b0b76e53d999e44c65cd0542bb54afa3d9ec..62743e39657d0a09af5890fd8702aec33b3a9223 100644 (file)
--- a/compiler/mir/include/mir/ops/ResizeOp.h
+++ b/compiler/mir/include/mir/ops/ResizeOp.h
@@ -40,7 +40,7 @@ public:
    };
  
    ResizeOp(Output *arg, ResizeMethod mode, const std::vector<float> &scales)
-      : Operation(Type::resizeIm, {arg}), _mode(mode), _scales(scales)
+    : Operation(Type::resizeIm, {arg}), _mode(mode), _scales(scales)
    {
      // Infer output shape based on given scales.
      auto &input_shape = getInputShape(0);
@@ -61,7 +61,7 @@ public:
    }
  
    ResizeOp(Output *arg, ResizeMethod mode, const Shape &output_shape)
-      : Operation(Type::resizeIm, {arg}), _mode(mode)
+    : Operation(Type::resizeIm, {arg}), _mode(mode)
    {
      // Calculate scales based on given shape.
      auto &input_shape = getInputShape(0);
diff --git a/compiler/mir/include/mir/ops/SliceOp.h b/compiler/mir/include/mir/ops/SliceOp.h

index 6370de4fac8df1f128c1de6120b20cdbb5bbfad3..1627d4b8213ee0515c6ed788cfa74dad44c45410 100644 (file)
--- a/compiler/mir/include/mir/ops/SliceOp.h
+++ b/compiler/mir/include/mir/ops/SliceOp.h
@@ -28,7 +28,7 @@ class SliceOp : public Operation
  {
  public:
    SliceOp(Output *arg, const Shape &starts, const Shape &sizes)
-      : Operation(Type::slice, {arg}), _starts(starts), _sizes(sizes)
+    : Operation(Type::slice, {arg}), _starts(starts), _sizes(sizes)
    {
      inferOutputTypes();
    }
diff --git a/compiler/mir/include/mir/ops/SqueezeOp.h b/compiler/mir/include/mir/ops/SqueezeOp.h

index 8ef2a78bb09950d2055f2650e32c8f12c8a93a5c..735b7d86d9fd2f863e1a6fd41233741e269bb2b5 100644 (file)
--- a/compiler/mir/include/mir/ops/SqueezeOp.h
+++ b/compiler/mir/include/mir/ops/SqueezeOp.h
@@ -29,7 +29,7 @@ class SqueezeOp : public Operation
  {
  public:
    SqueezeOp(Output *arg, const std::vector<std::int32_t> &dims_to_squeeze)
-      : Operation(Type::squeeze, {arg}), _dims_to_squeeze(dims_to_squeeze)
+    : Operation(Type::squeeze, {arg}), _dims_to_squeeze(dims_to_squeeze)
    {
      // Infer output shape.
      inferOutputTypes();
diff --git a/compiler/mir/src/Graph.cpp b/compiler/mir/src/Graph.cpp

index 0eccdac2b9cad5eb0a0a582ce050215826fd20f8..04b005de491afef05dee40129a1ac523acd6e954 100644 (file)
--- a/compiler/mir/src/Graph.cpp
+++ b/compiler/mir/src/Graph.cpp
@@ -123,11 +123,11 @@ void Graph::removeNode(Operation *op)
  
    if (op->getType() == Operation::Type::input)
      _inputs.erase(
-        std::remove(_inputs.begin(), _inputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
+      std::remove(_inputs.begin(), _inputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
  
    if (op->getType() == Operation::Type::output)
      _outputs.erase(
-        std::remove(_outputs.begin(), _outputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
+      std::remove(_outputs.begin(), _outputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
  
    _ops.erase(op);
    delete op;
diff --git a/compiler/mir/src/Operation.cpp b/compiler/mir/src/Operation.cpp

index 6f72acbf6427a3970de8050bb290fa4fb014fa94..9ba395f942e0469eb2b7a723541036993fd40691 100644 (file)
--- a/compiler/mir/src/Operation.cpp
+++ b/compiler/mir/src/Operation.cpp
@@ -40,7 +40,7 @@ void Operation::Output::replaceAllUsesWith(mir::Operation::Output *new_def)
  }
  
  Operation::Operation(Type type, const std::vector<Output *> &inputs, std::size_t num_outputs)
-    : _type(type)
+  : _type(type)
  {
    for (std::size_t i = 0; i < inputs.size(); ++i)
    {
diff --git a/compiler/mir/src/Shape.cpp b/compiler/mir/src/Shape.cpp

index 825420cd67a1680f842f1d598407a22f6bdcc590..06dae0c54203603a8f51f5ad99f700aee6294b7c 100644 (file)
--- a/compiler/mir/src/Shape.cpp
+++ b/compiler/mir/src/Shape.cpp
@@ -48,9 +48,9 @@ Shape broadcastShapes(const Shape &lhs_shape, const Shape &rhs_shape)
    for (int i = 0; i < num_dims; ++i)
    {
      const std::int32_t lhs_dim =
-        (i >= num_dims - lhs_shape.rank()) ? lhs_shape.dim(i - (num_dims - lhs_shape.rank())) : 1;
+      (i >= num_dims - lhs_shape.rank()) ? lhs_shape.dim(i - (num_dims - lhs_shape.rank())) : 1;
      const std::int32_t rhs_dim =
-        (i >= num_dims - rhs_shape.rank()) ? rhs_shape.dim(i - (num_dims - rhs_shape.rank())) : 1;
+      (i >= num_dims - rhs_shape.rank()) ? rhs_shape.dim(i - (num_dims - rhs_shape.rank())) : 1;
      if (lhs_dim == 1)
      {
        result_shape.dim(i) = rhs_dim;
diff --git a/compiler/mir/src/TensorVariant.cpp b/compiler/mir/src/TensorVariant.cpp

index 9e57dbaf093206f91df1088b585126231844361f..516c0df73ebcdea438971fe47bd701c484dd4dd4 100644 (file)
--- a/compiler/mir/src/TensorVariant.cpp
+++ b/compiler/mir/src/TensorVariant.cpp
@@ -35,7 +35,7 @@ TensorVariant::TensorVariant(const TensorType &type) : _type(type), _strides(typ
  }
  
  TensorVariant::TensorVariant(DataType element_type, const Shape &shape)
-    : TensorVariant(TensorType(element_type, shape))
+  : TensorVariant(TensorType(element_type, shape))
  {
  }
  
@@ -46,7 +46,7 @@ TensorVariant::TensorVariant(const TensorType &type, const void *data) : TensorV
  }
  
  TensorVariant::TensorVariant(DataType element_type, const Shape &shape, const void *data)
-    : TensorVariant(TensorType(element_type, shape), data)
+  : TensorVariant(TensorType(element_type, shape), data)
  {
  }
  
@@ -57,8 +57,8 @@ TensorVariant::TensorVariant(DataType element_type, const Shape &shape, const vo
   * @param shape shape to broadcast to
   */
  TensorVariant::TensorVariant(const TensorVariant &t_old, const Shape &shape)
-    : _type(t_old.getType().getElementType(), shape), _data(t_old._data),
-      _strides(static_cast<size_t>(shape.rank())), _element_size(t_old._element_size)
+  : _type(t_old.getType().getElementType(), shape), _data(t_old._data),
+    _strides(static_cast<size_t>(shape.rank())), _element_size(t_old._element_size)
  {
    int axis_old = t_old.getShape().rank() - 1;
    for (int d = shape.rank() - 1; d >= 0; d--)
diff --git a/compiler/mir/src/mir_caffe2_importer/caffe2_importer.cpp b/compiler/mir/src/mir_caffe2_importer/caffe2_importer.cpp

index 812fcc5cc86b3118722317b402d823a8be2417c8..abecfc88a00ce61122516cb32c65234ec9358ea6 100644 (file)
--- a/compiler/mir/src/mir_caffe2_importer/caffe2_importer.cpp
+++ b/compiler/mir/src/mir_caffe2_importer/caffe2_importer.cpp
@@ -99,7 +99,7 @@ using mir::Shape;
  
  Caffe2Importer::Caffe2Importer(std::string predict_net, std::string init_net,
                                 const std::vector<std::vector<int>> &input_shapes)
-    : _predictNet(std::move(predict_net)), _initNet(std::move(init_net))
+  : _predictNet(std::move(predict_net)), _initNet(std::move(init_net))
  {
    for (auto &shape : input_shapes)
      _inputShapes.emplace_back(shape);
@@ -308,27 +308,27 @@ void Caffe2Importer::setGraphOutputs()
  }
  
  const std::map<std::string, SupportedCaffe2OpType> Caffe2Importer::_operatorTypes = {
-    {"Add", SupportedCaffe2OpType::add},
-    {"AveragePool", SupportedCaffe2OpType::averagePool},
-    {"Conv", SupportedCaffe2OpType::conv},
-    {"Concat", SupportedCaffe2OpType::concat},
-    {"ConstantFill", SupportedCaffe2OpType::constantFill},
-    {"Dropout", SupportedCaffe2OpType::dropout},
-    {"FC", SupportedCaffe2OpType::FC},
-    {"GivenTensorFill", SupportedCaffe2OpType::givenTensorFill},
-    {"MaxPool", SupportedCaffe2OpType::maxPool},
-    {"Mul", SupportedCaffe2OpType::mul},
-    {"Relu", SupportedCaffe2OpType::relu},
-    {"ResizeNearest", SupportedCaffe2OpType::resizeNearest},
-    {"Sigmoid", SupportedCaffe2OpType::sigmoid},
-    {"Softmax", SupportedCaffe2OpType::softmax},
-    {"SpatialBN", SupportedCaffe2OpType::spatialBN},
-    {"Sum", SupportedCaffe2OpType::sum},
-    {"Clip", SupportedCaffe2OpType::clip},
-    {"Reshape", SupportedCaffe2OpType::reshape},
-    {"GivenTensorInt64Fill", SupportedCaffe2OpType::givenTensorInt64Fill},
+  {"Add", SupportedCaffe2OpType::add},
+  {"AveragePool", SupportedCaffe2OpType::averagePool},
+  {"Conv", SupportedCaffe2OpType::conv},
+  {"Concat", SupportedCaffe2OpType::concat},
+  {"ConstantFill", SupportedCaffe2OpType::constantFill},
+  {"Dropout", SupportedCaffe2OpType::dropout},
+  {"FC", SupportedCaffe2OpType::FC},
+  {"GivenTensorFill", SupportedCaffe2OpType::givenTensorFill},
+  {"MaxPool", SupportedCaffe2OpType::maxPool},
+  {"Mul", SupportedCaffe2OpType::mul},
+  {"Relu", SupportedCaffe2OpType::relu},
+  {"ResizeNearest", SupportedCaffe2OpType::resizeNearest},
+  {"Sigmoid", SupportedCaffe2OpType::sigmoid},
+  {"Softmax", SupportedCaffe2OpType::softmax},
+  {"SpatialBN", SupportedCaffe2OpType::spatialBN},
+  {"Sum", SupportedCaffe2OpType::sum},
+  {"Clip", SupportedCaffe2OpType::clip},
+  {"Reshape", SupportedCaffe2OpType::reshape},
+  {"GivenTensorInt64Fill", SupportedCaffe2OpType::givenTensorInt64Fill},
  };
-}
+} // namespace
  
  namespace mir_caffe2
  {
diff --git a/compiler/mir/src/mir_caffe2_importer/caffe2_op_creator.cpp b/compiler/mir/src/mir_caffe2_importer/caffe2_op_creator.cpp

index 3390f44821c8bec74aecb663d01ba0ac016c001c..de0762dfa1a7b4c310f0a71748edbb52a43c9dde 100644 (file)
--- a/compiler/mir/src/mir_caffe2_importer/caffe2_op_creator.cpp
+++ b/compiler/mir/src/mir_caffe2_importer/caffe2_op_creator.cpp
@@ -125,7 +125,7 @@ static std::vector<std::int32_t> getWindowSize(const ::caffe2::OperatorDef &op,
  {
    int is_global_pooling = getSingleArgument(op, "global_pooling", 0);
    bool has_custom_kernel_size =
-      hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
+    hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
    bool has_custom_kernels_size = hasArgument(op.arg(), "kernels");
  
    int kernel_h(0), kernel_w(0);
@@ -186,14 +186,13 @@ static void checkConvLikeOp(const ::caffe2::OperatorDef &op)
    if (has_custom_pad && hasArgument(op.arg(), "pad"))
      throw std::runtime_error("Custom pad can't be combined with overall pad");
  
-  if (has_custom_pad &&
-      !(hasArgument(op.arg(), "pad_l") && hasArgument(op.arg(), "pad_r") &&
-        hasArgument(op.arg(), "pad_t") && hasArgument(op.arg(), "pad_b")))
+  if (has_custom_pad && !(hasArgument(op.arg(), "pad_l") && hasArgument(op.arg(), "pad_r") &&
+                          hasArgument(op.arg(), "pad_t") && hasArgument(op.arg(), "pad_b")))
      throw std::runtime_error("If one custom pad specified - all custom pads must be specified");
  
    // Kernel size
    bool has_custom_kernel_size =
-      hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
+    hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
  
    if (has_custom_kernel_size && hasArgument(op.arg(), "kernel"))
      throw std::runtime_error("Custom kernel size can't be combined with overall kernel size");
@@ -201,7 +200,7 @@ static void checkConvLikeOp(const ::caffe2::OperatorDef &op)
    if (has_custom_kernel_size &&
        !(hasArgument(op.arg(), "kernel_h") && hasArgument(op.arg(), "kernel_w")))
      throw std::runtime_error(
-        "If one custom kernel size specified - all custom kernel sizes must be specified");
+      "If one custom kernel size specified - all custom kernel sizes must be specified");
  }
  
  static mir::TensorVariant createTensor(const OperatorDef &op)
@@ -356,7 +355,7 @@ Caffe2OpCreator::convertFC(const std::vector<mir::Operation::Output *> &inputs,
  
    auto reshape = createOp<ops::ReshapeOp>(inputs[0], shape)->getOutput(0);
    auto weights =
-      createOp<ops::TransposeOp>(inputs[1], std::vector<std::size_t>{1, 0})->getOutput(0);
+    createOp<ops::TransposeOp>(inputs[1], std::vector<std::size_t>{1, 0})->getOutput(0);
    auto result = createOp<ops::FullyConnectedOp>(reshape, weights)->getOutput(0);
    result = createOp<ops::AddOp>(result, inputs[2])->getOutput(0);
  
@@ -420,8 +419,8 @@ Caffe2OpCreator::convertResizeNearest(const std::vector<mir::Operation::Output *
    scales[2] = getSingleArgument(op, "height_scale", 1.0f);
    scales[3] = getSingleArgument(op, "width_scale", 1.0f);
    auto result =
-      createOp<ops::ResizeOp>(inputs[0], ops::ResizeOp::ResizeMethod::nearestNeighbor, scales)
-          ->getOutput(0);
+    createOp<ops::ResizeOp>(inputs[0], ops::ResizeOp::ResizeMethod::nearestNeighbor, scales)
+      ->getOutput(0);
    return {result};
  }
  
@@ -450,7 +449,7 @@ Caffe2OpCreator::convertSpatialBN(const std::vector<mir::Operation::Output *> &i
    // Sanity checks
    if (op.input_size() != 5)
      throw std::runtime_error(
-        "SpatialBN must have exactly 5 inputs ('sums' and 'sumsq' are not supported yet)");
+      "SpatialBN must have exactly 5 inputs ('sums' and 'sumsq' are not supported yet)");
    if (getSingleArgument(op, "is_test", 1) != 1)
      throw std::runtime_error("SpatialBN: only test mode supported");
  
@@ -462,7 +461,7 @@ Caffe2OpCreator::convertSpatialBN(const std::vector<mir::Operation::Output *> &i
    auto var_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[4]->getNode());
    if (scale_op == nullptr || bias_op == nullptr || mean_op == nullptr || var_op == nullptr)
      throw std::runtime_error(
-        "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet.");
+      "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet.");
  
    const auto &scale_tensor = scale_op->getValue();
    const auto &bias_tensor = bias_op->getValue();
diff --git a/compiler/mir/src/mir_caffe_importer/caffe_importer.cpp b/compiler/mir/src/mir_caffe_importer/caffe_importer.cpp

index 49f13fbd8034b58bbda9dd7c917c8ce3c22f8ad1..c7465829921230f1fe8aa8f5a96db5db0d69ffe7 100644 (file)
--- a/compiler/mir/src/mir_caffe_importer/caffe_importer.cpp
+++ b/compiler/mir/src/mir_caffe_importer/caffe_importer.cpp
@@ -357,66 +357,66 @@ void CaffeImporter::setGraphOutputs(mir::Graph *graph)
  }
  
  const std::map<std::string, CaffeOpType> CaffeImporter::_operatorTypes = {
-    {"AbsVal", CaffeOpType::absVal},
-    {"Accuracy", CaffeOpType::accuracy},
-    {"ArgMax", CaffeOpType::argMax},
-    {"BatchNorm", CaffeOpType::batchNorm},
-    {"BatchReindex", CaffeOpType::batchReindex},
-    {"Bias", CaffeOpType::bias},
-    {"BNLL", CaffeOpType::BNLL},
-    {"Clip", CaffeOpType::clip},
-    {"Concat", CaffeOpType::concat},
-    {"ContrastiveLoss", CaffeOpType::contrastiveLoss},
-    {"Convolution", CaffeOpType::convolution},
-    {"Crop", CaffeOpType::crop},
-    {"Data", CaffeOpType::data},
-    {"Deconvolution", CaffeOpType::deconvolution},
-    {"Dropout", CaffeOpType::dropout},
-    {"DummyData", CaffeOpType::dummyData},
-    {"Eltwise", CaffeOpType::eltwise},
-    {"ELU", CaffeOpType::ELU},
-    {"Embed", CaffeOpType::embed},
-    {"EuclidianLoss", CaffeOpType::euclidianLoss},
-    {"Exp", CaffeOpType::exp},
-    {"Filter", CaffeOpType::filter},
-    {"Flatten", CaffeOpType::flatten},
-    {"HDF5Data", CaffeOpType::HDF5Data},
-    {"HDF5Output", CaffeOpType::HDF5Output},
-    {"HingeLoss", CaffeOpType::hingeLoss},
-    {"Im2Col", CaffeOpType::im2Col},
-    {"ImageData", CaffeOpType::imageData},
-    {"InfogainLoss", CaffeOpType::infogainLoss},
-    {"InnerProduct", CaffeOpType::innerProduct},
-    {"Input", CaffeOpType::input},
-    {"Log", CaffeOpType::log},
-    {"LRN", CaffeOpType::LRN},
-    {"LSTM", CaffeOpType::LSTM},
-    {"MemoryData", CaffeOpType::memoryData},
-    {"MultinomialLogisticLoss", CaffeOpType::multinomialLogisticLoss},
-    {"MVN", CaffeOpType::MVN},
-    {"Parameter", CaffeOpType::parameter},
-    {"Pooling", CaffeOpType::pooling},
-    {"Power", CaffeOpType::power},
-    {"PReLU", CaffeOpType::PReLU},
-    {"Python", CaffeOpType::python},
-    {"Recurrent", CaffeOpType::recurrent},
-    {"Reduction", CaffeOpType::reduction},
-    {"ReLU", CaffeOpType::ReLU},
-    {"Reshape", CaffeOpType::reshape},
-    {"RNN", CaffeOpType::RNN},
-    {"Scale", CaffeOpType::scale},
-    {"SigmoidCrossEntropyLoss", CaffeOpType::sigmoidCrossEntropyLoss},
-    {"Sigmoid", CaffeOpType::sigmoid},
-    {"Silence", CaffeOpType::silence},
-    {"Softmax", CaffeOpType::softmax},
-    {"SoftmaxWithLoss", CaffeOpType::softmaxWithLoss},
-    {"SPP", CaffeOpType::SPP},
-    {"Split", CaffeOpType::split},
-    {"Slice", CaffeOpType::slice},
-    {"TanH", CaffeOpType::tanh},
-    {"Threshold", CaffeOpType::threshold},
-    {"Tile", CaffeOpType::tile},
-    {"WindowData", CaffeOpType::windowData}};
+  {"AbsVal", CaffeOpType::absVal},
+  {"Accuracy", CaffeOpType::accuracy},
+  {"ArgMax", CaffeOpType::argMax},
+  {"BatchNorm", CaffeOpType::batchNorm},
+  {"BatchReindex", CaffeOpType::batchReindex},
+  {"Bias", CaffeOpType::bias},
+  {"BNLL", CaffeOpType::BNLL},
+  {"Clip", CaffeOpType::clip},
+  {"Concat", CaffeOpType::concat},
+  {"ContrastiveLoss", CaffeOpType::contrastiveLoss},
+  {"Convolution", CaffeOpType::convolution},
+  {"Crop", CaffeOpType::crop},
+  {"Data", CaffeOpType::data},
+  {"Deconvolution", CaffeOpType::deconvolution},
+  {"Dropout", CaffeOpType::dropout},
+  {"DummyData", CaffeOpType::dummyData},
+  {"Eltwise", CaffeOpType::eltwise},
+  {"ELU", CaffeOpType::ELU},
+  {"Embed", CaffeOpType::embed},
+  {"EuclidianLoss", CaffeOpType::euclidianLoss},
+  {"Exp", CaffeOpType::exp},
+  {"Filter", CaffeOpType::filter},
+  {"Flatten", CaffeOpType::flatten},
+  {"HDF5Data", CaffeOpType::HDF5Data},
+  {"HDF5Output", CaffeOpType::HDF5Output},
+  {"HingeLoss", CaffeOpType::hingeLoss},
+  {"Im2Col", CaffeOpType::im2Col},
+  {"ImageData", CaffeOpType::imageData},
+  {"InfogainLoss", CaffeOpType::infogainLoss},
+  {"InnerProduct", CaffeOpType::innerProduct},
+  {"Input", CaffeOpType::input},
+  {"Log", CaffeOpType::log},
+  {"LRN", CaffeOpType::LRN},
+  {"LSTM", CaffeOpType::LSTM},
+  {"MemoryData", CaffeOpType::memoryData},
+  {"MultinomialLogisticLoss", CaffeOpType::multinomialLogisticLoss},
+  {"MVN", CaffeOpType::MVN},
+  {"Parameter", CaffeOpType::parameter},
+  {"Pooling", CaffeOpType::pooling},
+  {"Power", CaffeOpType::power},
+  {"PReLU", CaffeOpType::PReLU},
+  {"Python", CaffeOpType::python},
+  {"Recurrent", CaffeOpType::recurrent},
+  {"Reduction", CaffeOpType::reduction},
+  {"ReLU", CaffeOpType::ReLU},
+  {"Reshape", CaffeOpType::reshape},
+  {"RNN", CaffeOpType::RNN},
+  {"Scale", CaffeOpType::scale},
+  {"SigmoidCrossEntropyLoss", CaffeOpType::sigmoidCrossEntropyLoss},
+  {"Sigmoid", CaffeOpType::sigmoid},
+  {"Silence", CaffeOpType::silence},
+  {"Softmax", CaffeOpType::softmax},
+  {"SoftmaxWithLoss", CaffeOpType::softmaxWithLoss},
+  {"SPP", CaffeOpType::SPP},
+  {"Split", CaffeOpType::split},
+  {"Slice", CaffeOpType::slice},
+  {"TanH", CaffeOpType::tanh},
+  {"Threshold", CaffeOpType::threshold},
+  {"Tile", CaffeOpType::tile},
+  {"WindowData", CaffeOpType::windowData}};
  } // namespace
  
  std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename)
diff --git a/compiler/mir/src/mir_caffe_importer/caffe_op_creator.cpp b/compiler/mir/src/mir_caffe_importer/caffe_op_creator.cpp

index 37edc69c4b2383e1cd2074d53f0d8b2b65ea335f..a2c881b82588dca2d7ee9787c4a2de6a21ef4efe 100644 (file)
--- a/compiler/mir/src/mir_caffe_importer/caffe_op_creator.cpp
+++ b/compiler/mir/src/mir_caffe_importer/caffe_op_creator.cpp
@@ -374,7 +374,7 @@ static void convertPoolingParam(const caffe::PoolingParameter &params,
    {
      // Assuming NCHW format.
      const std::int32_t padded_input =
-        input_shape.dim(2 + i) + attributes.padding_before[i] + attributes.padding_after[i];
+      input_shape.dim(2 + i) + attributes.padding_before[i] + attributes.padding_after[i];
      if ((padded_input - attributes.window[i]) % attributes.strides[i] != 0)
        ++attributes.padding_after[i];
    }
@@ -449,7 +449,7 @@ CaffeOpCreator::convertSoftmax(const caffe::LayerParameter &layer,
      auto input = createOp<ops::TransposeOp>(inputs[0], std::vector<std::size_t>{0, 2, 3, 1});
      auto softmax = createOp<ops::SoftmaxOp>(input->getOutput(0), axis);
      auto result =
-        createOp<ops::TransposeOp>(softmax->getOutput(0), std::vector<std::size_t>{0, 3, 1, 2});
+      createOp<ops::TransposeOp>(softmax->getOutput(0), std::vector<std::size_t>{0, 3, 1, 2});
      return {result->getOutput(0)};
    }
  
@@ -823,7 +823,7 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
  
      c_t = createOp<ops::AddOp>(createOp<ops::MulOp>(c_cont_t, f_t)->getOutput(0),
                                 createOp<ops::MulOp>(i_t, g_t)->getOutput(0))
-              ->getOutput(0);
+            ->getOutput(0);
      h_t = createOp<ops::MulOp>(createOp<ops::TanhOp>(c_t)->getOutput(0), o_t)->getOutput(0);
  
      h_slices[t] = h_t;
diff --git a/compiler/mir/src/mir_onnx_importer/AttributeHelpers.h b/compiler/mir/src/mir_onnx_importer/AttributeHelpers.h

index 9a93b5b7d92f7fc9fbaa0725daa65f77d54021ab..ac1c3cfaddcb4bc546da82c456b85e4346632add 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/AttributeHelpers.h
+++ b/compiler/mir/src/mir_onnx_importer/AttributeHelpers.h
@@ -76,8 +76,8 @@ inline const onnx::AttributeProto *findAttribute(const onnx::NodeProto &node,
  {
    const auto &attributes = node.attribute();
    const auto it = std::find_if(
-      attributes.cbegin(), attributes.cend(),
-      [&name](const onnx::AttributeProto &attribute) { return attribute.name() == name; });
+    attributes.cbegin(), attributes.cend(),
+    [&name](const onnx::AttributeProto &attribute) { return attribute.name() == name; });
    if (it == attributes.cend())
      return nullptr;
    return &*it;
diff --git a/compiler/mir/src/mir_onnx_importer/ConvPoolHelpers.cpp b/compiler/mir/src/mir_onnx_importer/ConvPoolHelpers.cpp

index d98e6deae7a2e4f06e7f0538acb341f47b4ed59e..2091968d842f739e66507b51c61c5da88f3d9efa 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/ConvPoolHelpers.cpp
+++ b/compiler/mir/src/mir_onnx_importer/ConvPoolHelpers.cpp
@@ -55,7 +55,7 @@ void inferAutoPadding(const std::string &pad_type, const mir::Shape &input_shape
        // Assuming input has NCHW format.
        const std::int32_t residual = input_shape.dim(2 + i) % strides[i];
        const std::int32_t total_pad = std::max(
-          INT32_C(0), residual == 0 ? eff_window_size - strides[i] : eff_window_size - residual);
+        INT32_C(0), residual == 0 ? eff_window_size - strides[i] : eff_window_size - residual);
        if (pad_type == "SAME_UPPER")
        {
          padding_before[i] = total_pad / 2;
diff --git a/compiler/mir/src/mir_onnx_importer/ONNXHelpers.cpp b/compiler/mir/src/mir_onnx_importer/ONNXHelpers.cpp

index f3a9d182dab58e23fe686f350a35ec49b0cc78dd..77656cf4824ecfa3468ffa68526d66c1f872109a 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/ONNXHelpers.cpp
+++ b/compiler/mir/src/mir_onnx_importer/ONNXHelpers.cpp
@@ -166,9 +166,9 @@ mir::Operation *foldConstants(mir::Graph *graph, mir::Operation *op)
    }
  
    bool is_foldable =
-      std::all_of(op->getInputs().begin(), op->getInputs().end(), [](mir::Operation::Output *out) {
-        return out->getNode()->getType() == mir::Operation::Type::constant;
-      });
+    std::all_of(op->getInputs().begin(), op->getInputs().end(), [](mir::Operation::Output *out) {
+      return out->getNode()->getType() == mir::Operation::Type::constant;
+    });
  
    if (!is_foldable)
      return op;
diff --git a/compiler/mir/src/mir_onnx_importer/ONNXImporterImpl.cpp b/compiler/mir/src/mir_onnx_importer/ONNXImporterImpl.cpp

index 8b996244f31a23318cf6dc607ae6e13572215006..6379b6c87d637882b96ca4750d0451c8aa6dc9ab 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/ONNXImporterImpl.cpp
+++ b/compiler/mir/src/mir_onnx_importer/ONNXImporterImpl.cpp
@@ -134,7 +134,7 @@ void ONNXImporterImpl::collectUnsupportedOps()
      auto opset = _modelCtx->getDomainOpsetVersion(onnx_node.domain());
  
      NodeConverterRegistry::ConverterFunc converter =
-        NodeConverterRegistry::getInstance().lookup(op_type, opset);
+      NodeConverterRegistry::getInstance().lookup(op_type, opset);
  
      if (converter == nullptr)
        problems_op_set.emplace(op_type, opset);
@@ -176,7 +176,7 @@ void ONNXImporterImpl::createGraphInputs()
        }
  
        auto elem_type = onnxDataTypeToMirDataType(
-          (onnx::TensorProto_DataType)input.type().tensor_type().elem_type());
+        (onnx::TensorProto_DataType)input.type().tensor_type().elem_type());
        mir::TensorType type{elem_type, shape};
        auto *op = _graph->create<mir::ops::InputOp>(type);
        _converterCtx->setOutput(input.name(), op->getOutput(0));
@@ -199,7 +199,7 @@ std::unique_ptr<mir::Graph> ONNXImporterImpl::createIR()
      auto opset = _modelCtx->getDomainOpsetVersion(onnx_node.domain());
      // Get converter
      NodeConverterRegistry::ConverterFunc converter =
-        NodeConverterRegistry::getInstance().lookup(op_type, opset);
+      NodeConverterRegistry::getInstance().lookup(op_type, opset);
      assert(converter != nullptr);
      converter(onnx_node, _converterCtx.get());
    }
diff --git a/compiler/mir/src/mir_onnx_importer/ONNXNodeConverterRegistry.cpp b/compiler/mir/src/mir_onnx_importer/ONNXNodeConverterRegistry.cpp

index a11b18e897901b96e82fb6c9d53f98283fc93fd7..573b41468b37a94099c211100604916901b98990 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/ONNXNodeConverterRegistry.cpp
+++ b/compiler/mir/src/mir_onnx_importer/ONNXNodeConverterRegistry.cpp
@@ -117,8 +117,8 @@ NodeConverterRegistry::ConverterFunc NodeConverterRegistry::lookup(const std::st
    const VersionMap &conv_map = it->second;
  
    auto res = std::lower_bound(
-      conv_map.crbegin(), conv_map.crend(), opset,
-      [](const VersionMap::value_type &pair, int64_t opset) { return pair.first > opset; });
+    conv_map.crbegin(), conv_map.crend(), opset,
+    [](const VersionMap::value_type &pair, int64_t opset) { return pair.first > opset; });
  
    if (res == conv_map.crend())
    {
diff --git a/compiler/mir/src/mir_onnx_importer/Op/AveragePool.cpp b/compiler/mir/src/mir_onnx_importer/Op/AveragePool.cpp

index 503feffc8899650bbd95336c9e1a5e035e7a8179..1ee136ea608fcfb0a2eae1762eeef56aea1c6ad2 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/Op/AveragePool.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/AveragePool.cpp
@@ -40,7 +40,7 @@ void convertAveragePoolV1(const onnx::NodeProto &onnx_node, ConverterContext *co
    constexpr int num_spatial_dims = 2;
  
    const auto strides =
-      getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+    getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
    if (strides.size() != num_spatial_dims)
      throw std::runtime_error("AveragePool: attribute 'strides' has incorrect size.");
  
diff --git a/compiler/mir/src/mir_onnx_importer/Op/BatchNormalization.cpp b/compiler/mir/src/mir_onnx_importer/Op/BatchNormalization.cpp

index 8a6d8cc5138d4f8718f50b05ca0ced53736734b7..c743ee9e0e8b3ec7b56f95328919015471eb2e78 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/Op/BatchNormalization.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/BatchNormalization.cpp
@@ -81,7 +81,7 @@ void convertBatchNormalizationV9(const onnx::NodeProto &onnx_node, ConverterCont
  
    if (scale_op == nullptr || mean_op == nullptr || var_op == nullptr)
      throw std::runtime_error(
-        "BatchNormalization: only constant 'scale', 'mean' and 'variance' inputs are supported.");
+      "BatchNormalization: only constant 'scale', 'mean' and 'variance' inputs are supported.");
  
    mir::Tensor<float> scale_accessor(scale_op->getValue());
    mir::Tensor<float> mean_accessor(mean_op->getValue());
diff --git a/compiler/mir/src/mir_onnx_importer/Op/Conv.cpp b/compiler/mir/src/mir_onnx_importer/Op/Conv.cpp

index 7dc6ce818fc0ced446b408ab2741f18a08be9a2a..7d78826a691bd5b77064d9b2e94a41a0f75f809f 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/Op/Conv.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/Conv.cpp
@@ -139,7 +139,7 @@ void convertConvV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
    {
      auto bias = inputs[2];
      bias = createOp<mir::ops::ReshapeOp>(graph, bias, mir::Shape{1, bias->getShape().dim(0), 1, 1})
-               ->getOutput(0);
+             ->getOutput(0);
      result = createOp<mir::ops::AddOp>(graph, result, bias)->getOutput(0);
    }
  
diff --git a/compiler/mir/src/mir_onnx_importer/Op/ConvTranspose.cpp b/compiler/mir/src/mir_onnx_importer/Op/ConvTranspose.cpp

index 3078a19590eccf1e6462ea9c1d0b2ed7954cd096..ea0b6fa5ecbba95d3f1f26d7befbe6a51dd6625b 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/Op/ConvTranspose.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/ConvTranspose.cpp
@@ -49,19 +49,19 @@ void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *
    constexpr int num_spatial_dims = 2;
  
    const auto dilations =
-      getAttributeValue(onnx_node, "dilations", std::vector<std::int32_t>(num_spatial_dims, 1));
+    getAttributeValue(onnx_node, "dilations", std::vector<std::int32_t>(num_spatial_dims, 1));
    if (dilations.size() != num_spatial_dims)
      throw std::runtime_error("ConvTranspose: attribute 'dilations' has incorrect size.");
    if (!std::all_of(dilations.cbegin(), dilations.cend(), [](std::int32_t x) { return x == 1; }))
      throw std::runtime_error("ConvTranspose: attribute 'dilations' has unsupported value.");
  
    const auto strides =
-      getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+    getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
    if (strides.size() != num_spatial_dims)
      throw std::runtime_error("ConvTranspose: attribute 'strides' has incorrect size.");
  
-  const auto output_padding = getAttributeValue(onnx_node, "output_padding",
-                                                std::vector<std::int32_t>(num_spatial_dims, 0));
+  const auto output_padding =
+    getAttributeValue(onnx_node, "output_padding", std::vector<std::int32_t>(num_spatial_dims, 0));
    if (output_padding.size() != num_spatial_dims)
      throw std::runtime_error("ConvTranspose: attribute 'output_padding' has incorrect size.");
    if (!std::all_of(output_padding.cbegin(), output_padding.cend(),
@@ -71,8 +71,8 @@ void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *
    // Assuming kernel has IOHW format.
    assert(kernel->getShape().rank() == 4);
    const auto kernel_size = getAttributeValue(
-      onnx_node, "kernel_shape",
-      std::vector<std::int32_t>{kernel->getShape().dim(2), kernel->getShape().dim(3)});
+    onnx_node, "kernel_shape",
+    std::vector<std::int32_t>{kernel->getShape().dim(2), kernel->getShape().dim(3)});
    if (kernel_size.size() != num_spatial_dims)
      throw std::runtime_error("ConvTranspose: attribute 'kernel_shape' has incorrect size.");
  
@@ -92,14 +92,14 @@ void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *
      attributes.strides = strides;
      attributes.data_format = mir::DataFormat::NCHW;
      attributes.padding_type = mir::ops::PaddingType::SameUpper;
-    result = createOp<mir::ops::DeConv2DOp>(graph, input, kernel, attributes, output_shape)
-                 ->getOutput(0);
+    result =
+      createOp<mir::ops::DeConv2DOp>(graph, input, kernel, attributes, output_shape)->getOutput(0);
    }
    else
    {
      // TODO This code was not tested.
      throw std::runtime_error(
-        "ConvTranspose: absence of attribute 'output_shape' is not supported.");
+      "ConvTranspose: absence of attribute 'output_shape' is not supported.");
      std::vector<std::int32_t> padding_before(num_spatial_dims, 0);
      std::vector<std::int32_t> padding_after(num_spatial_dims, 0);
      if (const auto *pads_attr = findAttribute(onnx_node, "pads"))
@@ -128,7 +128,7 @@ void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *
    {
      auto bias = inputs[2];
      bias = createOp<mir::ops::ReshapeOp>(graph, bias, mir::Shape{1, bias->getShape().dim(0), 1, 1})
-               ->getOutput(0);
+             ->getOutput(0);
      result = createOp<mir::ops::AddOp>(graph, result, bias)->getOutput(0);
    }
  
diff --git a/compiler/mir/src/mir_onnx_importer/Op/MaxPool.cpp b/compiler/mir/src/mir_onnx_importer/Op/MaxPool.cpp

index 53e6e15565485d2b6aa00754865ea0d3ba9a7253..6c9ef6621f0d3a6acd0ffe219ad61c9f7715415c 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/Op/MaxPool.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/MaxPool.cpp
@@ -40,7 +40,7 @@ void convertMaxPoolV1(const onnx::NodeProto &onnx_node, ConverterContext *contex
    constexpr int num_spatial_dims = 2;
  
    const auto strides =
-      getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+    getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
    if (strides.size() != num_spatial_dims)
      throw std::runtime_error("MaxPool: attribute 'strides' has incorrect size.");
  
diff --git a/compiler/mir/src/mir_onnx_importer/Op/ReduceMean.cpp b/compiler/mir/src/mir_onnx_importer/Op/ReduceMean.cpp

index ec43bffb4db9c45c95510e69904533287450dbb4..9bfe162828a31559e758a225918f4a38d34895bc 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/Op/ReduceMean.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/ReduceMean.cpp
@@ -52,7 +52,7 @@ void convertReduceMeanV1(const onnx::NodeProto &onnx_node, ConverterContext *con
  
    mir::Graph *graph = context->getGraph();
    auto result =
-      createOp<mir::ops::ReduceMeanOp>(graph, inputs[0], reduce_dims, keep_dims)->getOutput(0);
+    createOp<mir::ops::ReduceMeanOp>(graph, inputs[0], reduce_dims, keep_dims)->getOutput(0);
  
    context->setNodeOutputs(onnx_node, {result});
  }
diff --git a/compiler/mir/src/mir_onnx_importer/Op/Upsample.cpp b/compiler/mir/src/mir_onnx_importer/Op/Upsample.cpp

index 346e22cc2f14230c6e07f92023b2e9c956819279..881ec89d34e95d2caac9108935a1c92d13c36641 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/Op/Upsample.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/Upsample.cpp
@@ -52,9 +52,9 @@ void convertUpsampleV1(const onnx::NodeProto &onnx_node, ConverterContext *conte
    scales_vector.at(3) = w_scale;
  
    auto result =
-      createOp<mir::ops::ResizeOp>(graph, inputs[0],
-                                   mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
-          ->getOutput(0);
+    createOp<mir::ops::ResizeOp>(graph, inputs[0],
+                                 mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+      ->getOutput(0);
  
    context->setNodeOutputs(onnx_node, {result});
  }
@@ -74,7 +74,7 @@ void convertUpsampleV7(const onnx::NodeProto &onnx_node, ConverterContext *conte
  
    if (scales_attr->floats_size() != inputs[0]->getShape().rank())
      throw std::runtime_error(
-        "Number of elements of scales should be the same as the rank of input");
+      "Number of elements of scales should be the same as the rank of input");
  
    assert(inputs[0]->getShape().rank() == 4 && "Only rank 4 is supported");
    std::vector<float> scales_vector(4);
@@ -85,9 +85,9 @@ void convertUpsampleV7(const onnx::NodeProto &onnx_node, ConverterContext *conte
    scales_vector.at(3) = scales_attr->floats(3);
  
    auto result =
-      createOp<mir::ops::ResizeOp>(graph, inputs[0],
-                                   mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
-          ->getOutput(0);
+    createOp<mir::ops::ResizeOp>(graph, inputs[0],
+                                 mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+      ->getOutput(0);
  
    context->setNodeOutputs(onnx_node, {result});
  }
@@ -117,9 +117,9 @@ void convertUpsampleV9(const onnx::NodeProto &onnx_node, ConverterContext *conte
      scales_vector[i] = scales_tensor.atOffset(i);
  
    auto result =
-      createOp<mir::ops::ResizeOp>(graph, inputs[0],
-                                   mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
-          ->getOutput(0);
+    createOp<mir::ops::ResizeOp>(graph, inputs[0],
+                                 mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+      ->getOutput(0);
  
    context->setNodeOutputs(onnx_node, {result});
  }
diff --git a/compiler/mir/src/mir_tflite_importer/tflite_importer.cpp b/compiler/mir/src/mir_tflite_importer/tflite_importer.cpp

index 3f245d2d4fa8b96cc4c60924a6a44b89b98017e8..7b91bf0ba973ec7b489cc0c242402b40323ae991 100644 (file)
--- a/compiler/mir/src/mir_tflite_importer/tflite_importer.cpp
+++ b/compiler/mir/src/mir_tflite_importer/tflite_importer.cpp
@@ -105,37 +105,37 @@ void TfliteImporter::import()
  }
  
  static const std::set<tflite::BuiltinOperator> supportedOperators = {
-    tflite::BuiltinOperator_ADD,
-    tflite::BuiltinOperator_AVERAGE_POOL_2D,
-    tflite::BuiltinOperator_CONCATENATION,
-    tflite::BuiltinOperator_CONV_2D,
-    tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
-    tflite::BuiltinOperator_DIV,
-    tflite::BuiltinOperator_FULLY_CONNECTED,
-    tflite::BuiltinOperator_HARD_SWISH,
-    tflite::BuiltinOperator_LEAKY_RELU,
-    tflite::BuiltinOperator_LOGISTIC,
-    tflite::BuiltinOperator_MAX_POOL_2D,
-    tflite::BuiltinOperator_MAXIMUM,
-    tflite::BuiltinOperator_MEAN,
-    tflite::BuiltinOperator_MUL,
-    tflite::BuiltinOperator_PAD,
-    tflite::BuiltinOperator_RELU,
-    tflite::BuiltinOperator_RELU6,
-    tflite::BuiltinOperator_RESHAPE,
-    tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
-    tflite::BuiltinOperator_RSQRT,
-    tflite::BuiltinOperator_SHAPE,
-    tflite::BuiltinOperator_SLICE,
-    tflite::BuiltinOperator_SOFTMAX,
-    tflite::BuiltinOperator_SQRT,
-    tflite::BuiltinOperator_SQUARED_DIFFERENCE,
-    tflite::BuiltinOperator_SQUEEZE,
-    tflite::BuiltinOperator_STRIDED_SLICE,
-    tflite::BuiltinOperator_SUB,
-    tflite::BuiltinOperator_TANH,
-    tflite::BuiltinOperator_TRANSPOSE,
-    tflite::BuiltinOperator_TRANSPOSE_CONV,
+  tflite::BuiltinOperator_ADD,
+  tflite::BuiltinOperator_AVERAGE_POOL_2D,
+  tflite::BuiltinOperator_CONCATENATION,
+  tflite::BuiltinOperator_CONV_2D,
+  tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+  tflite::BuiltinOperator_DIV,
+  tflite::BuiltinOperator_FULLY_CONNECTED,
+  tflite::BuiltinOperator_HARD_SWISH,
+  tflite::BuiltinOperator_LEAKY_RELU,
+  tflite::BuiltinOperator_LOGISTIC,
+  tflite::BuiltinOperator_MAX_POOL_2D,
+  tflite::BuiltinOperator_MAXIMUM,
+  tflite::BuiltinOperator_MEAN,
+  tflite::BuiltinOperator_MUL,
+  tflite::BuiltinOperator_PAD,
+  tflite::BuiltinOperator_RELU,
+  tflite::BuiltinOperator_RELU6,
+  tflite::BuiltinOperator_RESHAPE,
+  tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+  tflite::BuiltinOperator_RSQRT,
+  tflite::BuiltinOperator_SHAPE,
+  tflite::BuiltinOperator_SLICE,
+  tflite::BuiltinOperator_SOFTMAX,
+  tflite::BuiltinOperator_SQRT,
+  tflite::BuiltinOperator_SQUARED_DIFFERENCE,
+  tflite::BuiltinOperator_SQUEEZE,
+  tflite::BuiltinOperator_STRIDED_SLICE,
+  tflite::BuiltinOperator_SUB,
+  tflite::BuiltinOperator_TANH,
+  tflite::BuiltinOperator_TRANSPOSE,
+  tflite::BuiltinOperator_TRANSPOSE_CONV,
  };
  
  void TfliteImporter::collectUnsupportedOps()
@@ -268,8 +268,8 @@ void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflit
        outputs = _opCreator->convertConv2D(op->builtin_options.AsConv2DOptions(), inputs);
        break;
      case tflite::BuiltinOperator_DEPTHWISE_CONV_2D:
-      outputs = _opCreator->convertDepthwiseConv2D(op->builtin_options.AsDepthwiseConv2DOptions(),
-                                                   inputs);
+      outputs =
+        _opCreator->convertDepthwiseConv2D(op->builtin_options.AsDepthwiseConv2DOptions(), inputs);
        break;
      case tflite::BuiltinOperator_MAX_POOL_2D:
        outputs = _opCreator->convertMaxPool2D(op->builtin_options.AsPool2DOptions(), inputs);
@@ -279,21 +279,21 @@ void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflit
        break;
      case tflite::BuiltinOperator_CONCATENATION:
        outputs =
-          _opCreator->convertConcatenation(op->builtin_options.AsConcatenationOptions(), inputs);
+        _opCreator->convertConcatenation(op->builtin_options.AsConcatenationOptions(), inputs);
        break;
      case tflite::BuiltinOperator_RESHAPE:
        outputs = _opCreator->convertReshape(op->builtin_options.AsReshapeOptions(), inputs);
        break;
      case tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
        outputs = _opCreator->convertResizeNearestNeighbor(
-          op->builtin_options.AsResizeNearestNeighborOptions(), inputs);
+        op->builtin_options.AsResizeNearestNeighborOptions(), inputs);
        break;
      case tflite::BuiltinOperator_MEAN:
        outputs = _opCreator->convertMean(op->builtin_options.AsReducerOptions(), inputs);
        break;
      case tflite::BuiltinOperator_FULLY_CONNECTED:
        outputs =
-          _opCreator->convertFullyConnected(op->builtin_options.AsFullyConnectedOptions(), inputs);
+        _opCreator->convertFullyConnected(op->builtin_options.AsFullyConnectedOptions(), inputs);
        break;
      case tflite::BuiltinOperator_SOFTMAX:
        outputs = _opCreator->convertSoftmax(op->builtin_options.AsSoftmaxOptions(), inputs);
@@ -333,7 +333,7 @@ void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflit
        break;
      case tflite::BuiltinOperator_TRANSPOSE_CONV:
        outputs =
-          _opCreator->convertTransposeConv(op->builtin_options.AsTransposeConvOptions(), inputs);
+        _opCreator->convertTransposeConv(op->builtin_options.AsTransposeConvOptions(), inputs);
        break;
      case tflite::BuiltinOperator_PAD:
        outputs = _opCreator->convertPad(op->builtin_options.AsPadOptions(), inputs);
@@ -352,7 +352,7 @@ void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflit
        break;
      case tflite::BuiltinOperator_STRIDED_SLICE:
        outputs =
-          _opCreator->convertStridedSlice(op->builtin_options.AsStridedSliceOptions(), inputs);
+        _opCreator->convertStridedSlice(op->builtin_options.AsStridedSliceOptions(), inputs);
        break;
      case tflite::BuiltinOperator_LEAKY_RELU:
        outputs = _opCreator->convertLeakyReLU(op->builtin_options.AsLeakyReluOptions(), inputs);
diff --git a/compiler/mir/src/mir_tflite_importer/tflite_op_creator.cpp b/compiler/mir/src/mir_tflite_importer/tflite_op_creator.cpp

index d9f98da55c294dac5c48b9c754dc01e473525c85..58425e9a98f89fdf49c8a669e0725b03d28ad8eb 100644 (file)
--- a/compiler/mir/src/mir_tflite_importer/tflite_op_creator.cpp
+++ b/compiler/mir/src/mir_tflite_importer/tflite_op_creator.cpp
@@ -92,9 +92,9 @@ static void calculatePadding(mir::ops::PaddingType padding_type, const mir::Shap
        {
          // Assuming NHWC format.
          const std::int32_t total_padding =
-            (input_shape.dim(1 + i) % strides[i] == 0)
-                ? std::max(0, window_size[i] - strides[i])
-                : std::max(0, window_size[i] - input_shape.dim(1 + i) % strides[i]);
+          (input_shape.dim(1 + i) % strides[i] == 0)
+            ? std::max(0, window_size[i] - strides[i])
+            : std::max(0, window_size[i] - input_shape.dim(1 + i) % strides[i]);
          padding_before[i] = total_padding / 2;
          padding_after[i] = total_padding - padding_before[i];
        }
@@ -332,7 +332,7 @@ TFLiteOpCreator::convertResizeNearestNeighbor(const tflite::ResizeNearestNeighbo
    Shape res_shape{input_shape.dim(0), size_tensor.at(mir::Index{0}), size_tensor.at(mir::Index{1}),
                    input_shape.dim(3)};
    auto result =
-      createOp<ops::ResizeOp>(input, ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
+    createOp<ops::ResizeOp>(input, ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
    return {result->getOutput(0)};
  }
  
diff --git a/compiler/mir/src/ops/AvgPool2DOp.cpp b/compiler/mir/src/ops/AvgPool2DOp.cpp

index 52b67303ff2e22d5338d82f5af009a91d1214ea6..945917208c7271a348e270549d59bfd0418ae30e 100644 (file)
--- a/compiler/mir/src/ops/AvgPool2DOp.cpp
+++ b/compiler/mir/src/ops/AvgPool2DOp.cpp
@@ -50,7 +50,7 @@ void AvgPool2DOp::inferOutputTypes()
      //   (in_size - window_size + 1 + stride - 1) / stride =
      //   (in_size - window_size) / stride + 1
      output_shape.dim(spatial_dim_index) =
-        (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
+      (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
    }
  
    setOutputType(0, {getInput(0)->getElementType(), output_shape});
diff --git a/compiler/mir/src/ops/Conv2DOp.cpp b/compiler/mir/src/ops/Conv2DOp.cpp

index 1addc5734124ab7e266ec4b134d32dc130c410a9..1de73b62d3bdec676b5c93403939e82b3fa8315a 100644 (file)
--- a/compiler/mir/src/ops/Conv2DOp.cpp
+++ b/compiler/mir/src/ops/Conv2DOp.cpp
@@ -54,7 +54,7 @@ void Conv2DOp::inferOutputTypes()
      //   (in_size - kernel_size + 1 + stride - 1) / stride =
      //   (in_size - kernel_size) / stride + 1
      output_shape.dim(spatial_dim_index) =
-        (padded_input - kernel_shape.dim(1 + i)) / _attributes.strides[i] + 1;
+      (padded_input - kernel_shape.dim(1 + i)) / _attributes.strides[i] + 1;
    }
  
    auto dt = getInput(0)->getElementType();
diff --git a/compiler/mir/src/ops/DeConv2DOp.cpp b/compiler/mir/src/ops/DeConv2DOp.cpp

index 35b111bc03231c0cfdfb5f9a4c730222a6a5a541..08829d327d8439cc5de74363136844a42a3b3702 100644 (file)
--- a/compiler/mir/src/ops/DeConv2DOp.cpp
+++ b/compiler/mir/src/ops/DeConv2DOp.cpp
@@ -36,8 +36,8 @@ void DeConv2DOp::inferPaddings()
    {
      const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
      const std::int32_t total_padding =
-        (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
-        output_shape.dim(spatial_dim_index);
+      (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
+      output_shape.dim(spatial_dim_index);
  
      switch (_attributes.padding_type)
      {
@@ -85,8 +85,8 @@ void DeConv2DOp::inferOutputTypes()
    {
      const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
      output_shape.dim(spatial_dim_index) =
-        (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
-        (_attributes.padding_before.at(i) + _attributes.padding_after.at(i));
+      (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
+      (_attributes.padding_before.at(i) + _attributes.padding_after.at(i));
    }
  
    setOutputType(0, {getInput(0)->getElementType(), output_shape});
diff --git a/compiler/mir/src/ops/DepthwiseConv2DOp.cpp b/compiler/mir/src/ops/DepthwiseConv2DOp.cpp

index 0154bcd09cf0967f44d54a2797a164e352408061..521d2eb49a1266f9a95409f0df339d0916366d71 100644 (file)
--- a/compiler/mir/src/ops/DepthwiseConv2DOp.cpp
+++ b/compiler/mir/src/ops/DepthwiseConv2DOp.cpp
@@ -50,7 +50,7 @@ void DepthwiseConv2DOp::inferOutputTypes()
      //   (in_size - kernel_size + 1 + stride - 1) / stride =
      //   (in_size - kernel_size) / stride + 1
      output_shape.dim(spatial_dim_index) =
-        (padded_input - kernel_shape.dim(i)) / _attributes.strides[i] + 1;
+      (padded_input - kernel_shape.dim(i)) / _attributes.strides[i] + 1;
    }
  
    setOutputType(0, {getInput(0)->getElementType(), output_shape});
diff --git a/compiler/mir/src/ops/MaxPool2DOp.cpp b/compiler/mir/src/ops/MaxPool2DOp.cpp

index 38e72424e04b01a31c10f769d07fcaad6c808930..0cb3aa93caca0cbd764d698c5f4b97bb957f987c 100644 (file)
--- a/compiler/mir/src/ops/MaxPool2DOp.cpp
+++ b/compiler/mir/src/ops/MaxPool2DOp.cpp
@@ -50,7 +50,7 @@ void MaxPool2DOp::inferOutputTypes()
      //   (in_size - window_size + 1 + stride - 1) / stride =
      //   (in_size - window_size) / stride + 1
      output_shape.dim(spatial_dim_index) =
-        (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
+      (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
    }
  
    setOutputType(0, {getInput(0)->getElementType(), output_shape});
diff --git a/compiler/mir/src/ops/PadOp.cpp b/compiler/mir/src/ops/PadOp.cpp

index 465856d92e4e9edf7dff296db4f30948a7ae9b67..38feaccdc2548a106aa426d02cb0ee28e8d70e1d 100644 (file)
--- a/compiler/mir/src/ops/PadOp.cpp
+++ b/compiler/mir/src/ops/PadOp.cpp
@@ -30,7 +30,7 @@ void PadOp::inferOutputTypes()
    for (int32_t dim = 0; dim < num_dims; ++dim)
    {
      out_shape.dim(dim) =
-        _attributes.padding_before[dim] + input_shape.dim(dim) + _attributes.padding_after[dim];
+      _attributes.padding_before[dim] + input_shape.dim(dim) + _attributes.padding_after[dim];
    }
  
    setOutputType(0, {getInput(0)->getElementType(), out_shape});
diff --git a/compiler/mir/src/ops/TransposeOp.cpp b/compiler/mir/src/ops/TransposeOp.cpp

index 92282e17d9212b1900374f0de978050ced7ead81..d04cdb4f2683d554a0dc3474ad870d35a24fe84e 100644 (file)
--- a/compiler/mir/src/ops/TransposeOp.cpp
+++ b/compiler/mir/src/ops/TransposeOp.cpp
@@ -22,7 +22,7 @@ namespace ops
  {
  
  TransposeOp::TransposeOp(Output *arg, const std::vector<std::size_t> &axis_order)
-    : Operation(Type::transpose, {arg}), _axis_order(axis_order)
+  : Operation(Type::transpose, {arg}), _axis_order(axis_order)
  {
    assert(_axis_order.size() == static_cast<std::size_t>(getInputShape(0).rank()));
    inferOutputTypes();
@@ -34,7 +34,7 @@ void TransposeOp::inferOutputTypes()
    Shape output_shape(input_shape.rank());
    for (std::size_t i = 0; i < _axis_order.size(); ++i)
      output_shape.dim(static_cast<std::int64_t>(i)) =
-        input_shape.dim(static_cast<int32_t>(_axis_order.at(i)));
+      input_shape.dim(static_cast<int32_t>(_axis_order.at(i)));
  
    setOutputType(0, {getInput(0)->getElementType(), output_shape});
  }
diff --git a/compiler/mir/unittests/ShapeInference.cpp b/compiler/mir/unittests/ShapeInference.cpp

index bae4ec5e24f30585ea35219cac1a1a84a706ed6b..c902b1e12d0b1ac980878eb7cd91b5e803855670 100644 (file)
--- a/compiler/mir/unittests/ShapeInference.cpp
+++ b/compiler/mir/unittests/ShapeInference.cpp
@@ -80,8 +80,8 @@ TEST(ShapeInferenceTest, ResizeWithScale)
    auto input = g.create<ops::InputOp>(input_type);
  
    auto op =
-      g.create<ops::ResizeOp>(input->getOutput(0), ops::ResizeOp::ResizeMethod::nearestNeighbor,
-                              std::vector<float>{1, 6, 2, 1});
+    g.create<ops::ResizeOp>(input->getOutput(0), ops::ResizeOp::ResizeMethod::nearestNeighbor,
+                            std::vector<float>{1, 6, 2, 1});
  
    ASSERT_EQ(result_shape, op->getOutputShape(0));
  }
diff --git a/compiler/mir/unittests/ShapeRange.cpp b/compiler/mir/unittests/ShapeRange.cpp

index 3b32d0c6118fe69c5f570d596fc7408773609bbc..3797e3ccc5052eca3df272f2d47fa0266ff25790 100644 (file)
--- a/compiler/mir/unittests/ShapeRange.cpp
+++ b/compiler/mir/unittests/ShapeRange.cpp
@@ -29,7 +29,7 @@ struct ParamType
  
    template <typename... Args>
    explicit ParamType(int32_t actual_len, Args &&... args)
-      : actual_length(actual_len), shape({static_cast<int32_t>(args)...})
+    : actual_length(actual_len), shape({static_cast<int32_t>(args)...})
    {
    }
  };
diff --git a/compiler/mir2loco/src/mir2loco.test.cpp b/compiler/mir2loco/src/mir2loco.test.cpp

index 3870caeb5a515082d530d2469fcb888f3d3edb62..92ab994884fd0f50895928df99e33517d37c14cf 100644 (file)
--- a/compiler/mir2loco/src/mir2loco.test.cpp
+++ b/compiler/mir2loco/src/mir2loco.test.cpp
@@ -140,10 +140,10 @@ TEST_F(TestTransformer_mir2loco, Avg_Pool_Test)
  
    loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
    loco::FeatureEncode *encode_node =
-      dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
+    dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
    loco::AvgPool2D *pool_node = dynamic_cast<loco::AvgPool2D *>(loco_graph->nodes()->at(2));
    loco::FeatureDecode *decode_node =
-      dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
+    dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
    loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(4));
  
    ASSERT_NE(pull_node, nullptr);
@@ -188,10 +188,10 @@ TEST_F(TestTransformer_mir2loco, Max_Pool_Test)
  
    loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
    loco::FeatureEncode *encode_node =
-      dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
+    dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
    loco::MaxPool2D *pool_node = dynamic_cast<loco::MaxPool2D *>(loco_graph->nodes()->at(2));
    loco::FeatureDecode *decode_node =
-      dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
+    dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
    loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(4));
  
    ASSERT_NE(pull_node, nullptr);
@@ -273,7 +273,7 @@ TEST_F(TestTransformer_mir2loco, Reshape_Test)
  
    loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
    loco::Reshape<loco::ReshapeType::Fixed> *reshape_node =
-      dynamic_cast<loco::Reshape<loco::ReshapeType::Fixed> *>(loco_graph->nodes()->at(1));
+    dynamic_cast<loco::Reshape<loco::ReshapeType::Fixed> *>(loco_graph->nodes()->at(1));
    loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
  
    ASSERT_NE(pull_node, nullptr);
@@ -385,11 +385,11 @@ TEST_F(TestTransformer_mir2loco, Conv2D_Test)
    loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
    loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
    loco::FeatureEncode *encode_node =
-      dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2));
+    dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2));
    loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(loco_graph->nodes()->at(3));
    loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(loco_graph->nodes()->at(4));
    loco::FeatureDecode *decode_node =
-      dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5));
+    dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5));
    loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(6));
  
    ASSERT_NE(pull_node, nullptr);
@@ -430,7 +430,7 @@ TEST_F(TestTransformer_mir2loco, Softmax_Test)
  
    loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
    loco::TensorSoftmax *softmax_node =
-      dynamic_cast<loco::TensorSoftmax *>(loco_graph->nodes()->at(1));
+    dynamic_cast<loco::TensorSoftmax *>(loco_graph->nodes()->at(1));
    loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
  
    ASSERT_NE(pull_node, nullptr);
@@ -520,7 +520,7 @@ TEST_F(TestTransformer_mir2loco, DepthwiseConv2D_Test)
    attributes.padding_after = {7, 4};
  
    auto *conv =
-      mir_graph.create<mir::ops::DepthwiseConv2DOp>(input, filter, attributes)->getOutput(0);
+    mir_graph.create<mir::ops::DepthwiseConv2DOp>(input, filter, attributes)->getOutput(0);
  
    mir_graph.create<mir::ops::OutputOp>(conv);
    input->setName("x");
@@ -545,7 +545,7 @@ TEST_F(TestTransformer_mir2loco, DepthwiseConv2D_Test)
    loco::DepthwiseConv2D *dw_conv_node = dynamic_cast<loco::DepthwiseConv2D *>(*encode_uses.begin());
    ASSERT_NE(dw_conv_node, nullptr);
    loco::DepthwiseFilterEncode *filter_node =
-      dynamic_cast<loco::DepthwiseFilterEncode *>(dw_conv_node->ker());
+    dynamic_cast<loco::DepthwiseFilterEncode *>(dw_conv_node->ker());
    ASSERT_NE(filter_node, nullptr);
    ASSERT_EQ(dw_conv_node->ifm(), encode_node);
    // Check params
@@ -611,7 +611,7 @@ TEST_F(TestTransformer_mir2loco, DeConv2D_Test)
    auto encode_uses = loco::succs(encode_node);
    ASSERT_EQ(encode_uses.size(), 1);
    loco::TransposedConv2D *tr_conv_node =
-      dynamic_cast<loco::TransposedConv2D *>(*encode_uses.begin());
+    dynamic_cast<loco::TransposedConv2D *>(*encode_uses.begin());
    ASSERT_NE(tr_conv_node, nullptr);
    loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(tr_conv_node->ker());
    ASSERT_NE(filter_node, nullptr);
@@ -703,8 +703,8 @@ TEST_F(TestTransformer_mir2loco, Transpose_Test)
    mir::TensorType input_type{mir::DataType::FLOAT32, {2, 7, 9, 5}};
    auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
    auto *transpose =
-      mir_graph.create<mir::ops::TransposeOp>(input, std::vector<std::size_t>{3, 0, 1, 2})
-          ->getOutput(0);
+    mir_graph.create<mir::ops::TransposeOp>(input, std::vector<std::size_t>{3, 0, 1, 2})
+      ->getOutput(0);
    mir_graph.create<mir::ops::OutputOp>(transpose);
    input->setName("x");
    transpose->setName("y");
diff --git a/compiler/moco-log/CMakeLists.txt b/compiler/moco-log/CMakeLists.txt

index 036b4e74ba6e419e12127f211d2bd685966d608b..af6052d0c747651804013d6c61bd42a17ffa7851 100644 (file)
--- a/compiler/moco-log/CMakeLists.txt
+++ b/compiler/moco-log/CMakeLists.txt
@@ -5,5 +5,4 @@ add_library(moco_log SHARED ${SOURCES})
  target_include_directories(moco_log PUBLIC include)
  target_link_libraries(moco_log PUBLIC hermes)
  target_link_libraries(moco_log PRIVATE hermes_std)
-target_link_libraries(moco_log PRIVATE stdex)
  install(TARGETS moco_log DESTINATION lib)
diff --git a/compiler/moco-log/src/LoggingContext.cpp b/compiler/moco-log/src/LoggingContext.cpp

index a004e1d3d3fd6a597117a1bea67459f11cc6f70b..c75e5e21fd9ad3b1bd132401a73cdf555cd984a5 100644 (file)
--- a/compiler/moco-log/src/LoggingContext.cpp
+++ b/compiler/moco-log/src/LoggingContext.cpp
@@ -18,7 +18,8 @@
  #include "moco/Log.h"
  
  #include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace moco
  {
@@ -30,8 +31,8 @@ hermes::Context *LoggingContext::get(void)
    if (ctx == nullptr)
    {
      ctx = new hermes::Context;
-    ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-    ctx->config(stdex::make_unique<LoggerConfig>());
+    ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+    ctx->config(std::make_unique<LoggerConfig>());
    }
  
    return ctx;
diff --git a/compiler/moco-tf/CMakeLists.txt b/compiler/moco-tf/CMakeLists.txt

index 5516388a452e4a981f7fba1c2616320648624541..7c42761ba7132f57f0826260a5a30f158800251d 100644 (file)
--- a/compiler/moco-tf/CMakeLists.txt
+++ b/compiler/moco-tf/CMakeLists.txt
@@ -19,7 +19,6 @@ target_link_libraries(moco_tf_frontend PRIVATE moco_support)
  target_link_libraries(moco_tf_frontend PRIVATE bino)
  target_link_libraries(moco_tf_frontend PRIVATE fipe)
  target_link_libraries(moco_tf_frontend PRIVATE locop)
-target_link_libraries(moco_tf_frontend PRIVATE stdex)
  target_link_libraries(moco_tf_frontend PRIVATE moco_log)
  target_link_libraries(moco_tf_frontend PRIVATE pepper_str)
  target_link_libraries(moco_tf_frontend PRIVATE pepper_strcast)
@@ -44,7 +43,6 @@ target_link_libraries(moco_tf_frontend_test fipe)
  target_link_libraries(moco_tf_frontend_test locop)
  target_link_libraries(moco_tf_frontend_test moco_log)
  target_link_libraries(moco_tf_frontend_test moco_tf_frontend)
-target_link_libraries(moco_tf_frontend_test stdex)
  target_link_libraries(moco_tf_frontend_test plier_tf)
  target_link_libraries(moco_tf_frontend_test locoex_customop)
  target_link_libraries(moco_tf_frontend_test logo)
diff --git a/compiler/moco-tf/requires.cmake b/compiler/moco-tf/requires.cmake

index 3e0fabee9f9e9936017966ea82e6986dd0d39879..90590e37430dc5a0b79b75caa6c40dff3a45fd82 100644 (file)
--- a/compiler/moco-tf/requires.cmake
+++ b/compiler/moco-tf/requires.cmake
@@ -2,7 +2,6 @@ require("fipe")
  require("loco")
  require("moco")
  require("locop")
-require("stdex")
  require("moco-log")
  require("pepper-strcast")
  require("locomotiv")
diff --git a/compiler/moco-tf/src/BroadcastHelper.h b/compiler/moco-tf/src/BroadcastHelper.h

index 6238ad2697af465258cf124914674ad8313602ed..d4e1bba550b5571cc32261eeb8b2b8e7ab795db8 100644 (file)
--- a/compiler/moco-tf/src/BroadcastHelper.h
+++ b/compiler/moco-tf/src/BroadcastHelper.h
@@ -65,7 +65,7 @@ private:
   * This mimics "tf.broadcast_to" API in TensorFlow.
   */
  static inline auto broadcast_to(const loco::TensorShape &shape)
-    -> decltype(bino::transform_both(std::declval<BroadcastFunctor>()))
+  -> decltype(bino::transform_both(std::declval<BroadcastFunctor>()))
  {
    return bino::transform_both(BroadcastFunctor{shape});
  }
diff --git a/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp

index b59a3f3d78562149a11336761aa07eb4277b4393..71f6230b79dc6012a50bc5037d2733f39b4c7b01 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp
@@ -24,7 +24,6 @@
  
  #include <loco/Service/ShapeInference.h>
  
-#include <stdex/Memory.h>
  #include <oops/UserExn.h>
  
  namespace
diff --git a/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp

index d3cbd4ab3136d26da7fca1858102c12d9c7f1d86..1d33439336420181b4c94b91766a215b7f6f3afd 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp
@@ -32,7 +32,7 @@ using plier::tf::DataLayout;
  
  void set_filter_enc(loco::FilterEncode *filter_enc)
  {
-  auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+  auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
  
    // In TensorFlow, Conv2dBackpropInput's filter is a 4-D tensor of following shape:
    // [filter_height, filter_width, out_channels, in_channels] or HWOI or HWNC (in/out in loco sense)
@@ -163,9 +163,9 @@ loco::Padding2D Padding2DInference::operator()(void)
      // 'tight fit' output. When output size (set by 'input sizes' node input) is
      // larger than tight fit, extra spaces filled with zero.
      auto tight_output_vertical = tight_output_for_valid_padding(
-        input().vertical.value(), stride().vertical(), window().vertical());
+      input().vertical.value(), stride().vertical(), window().vertical());
      auto tight_output_horizontal = tight_output_for_valid_padding(
-        input().horizontal.value(), stride().horizontal(), window().horizontal());
+      input().horizontal.value(), stride().horizontal(), window().horizontal());
  
      if (output().vertical.value() < tight_output_vertical or
          output().horizontal.value() < tight_output_horizontal)
@@ -191,8 +191,8 @@ loco::Padding2D Padding2DInference::operator()(void)
      auto whole_pad_vertical = padding_needed(input().vertical.value(), output().vertical.value(),
                                               stride().vertical(), window().vertical());
      auto whole_pad_horizontal =
-        padding_needed(input().horizontal.value(), output().horizontal.value(),
-                       stride().horizontal(), window().horizontal());
+      padding_needed(input().horizontal.value(), output().horizontal.value(), stride().horizontal(),
+                     window().horizontal());
  
      loco::Padding2D res;
  
diff --git a/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp

index a955793a895f40f350ba69f99ceed30316cf267a..30f01cdd38aef87fae53044fbffc045d6e3cc6c3 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp
@@ -29,7 +29,7 @@ using plier::tf::DataLayout;
  
  void set_filter_enc(loco::FilterEncode *filter_enc)
  {
-  auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+  auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
  
    // In TensorFlow, conv2d filter is a 4-D tensor of following shape:
    // [filter_height, filter_width, in_channels, out_channels] -> HWIO (HWCN)
diff --git a/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp

index 50dddf637624128f99ce499536213627f7a2aa62..dd04c24275100e4f763aa49f192f80641010490d 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp
@@ -30,7 +30,7 @@ using plier::tf::DataLayout;
  
  void set_filter_enc(loco::DepthwiseFilterEncode *filter_enc)
  {
-  auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>();
+  auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>();
  
    // In TensorFlow, depthwiseconv2dnative filter is a 4-D tensor of following shape:
    // [filter_height, filter_width, in_channels, channel_multiplier] -> HWCM
@@ -47,28 +47,28 @@ bool canonicalize_depthwiseconv2dnative(loco::Graph *graph, moco::TFDepthwiseCon
    LOGGER(l);
  
    /**
- * @note This will replace TFDepthwiseConv2dNative node with Canonical FeatureEncode +
- *       DepthwiseFilterEncode + DepthwiseConv2D + FeatureDecode
- *
- *       Before
- *              A -+- TFDepthwiseConv2dNative - C
- *                 |
- *              B -+
- *
- *       After
- *
- *            A -+ FeatureEncode ----------------+- DepthwiseConv2D - FeatureDecode - C
- *               |                               |
- *               +-(TFDepthwiseConv2dNative)     |
- *               |                               |
- *            B -+ DepthwiseFilterEncode --------+
- *
- *       Where
- *                 A : ifm of TFDepthwiseConv2dNative
- *                 B : ker of TFDepthwiseConv2dNative
- *                 C : a node that uses TFDepthwiseConv2dNative as an input
- *                 TFDepthwiseConv2dNative is disconnected from other nodes
- */
+   * @note This will replace TFDepthwiseConv2dNative node with Canonical FeatureEncode +
+   *       DepthwiseFilterEncode + DepthwiseConv2D + FeatureDecode
+   *
+   *       Before
+   *              A -+- TFDepthwiseConv2dNative - C
+   *                 |
+   *              B -+
+   *
+   *       After
+   *
+   *            A -+ FeatureEncode ----------------+- DepthwiseConv2D - FeatureDecode - C
+   *               |                               |
+   *               +-(TFDepthwiseConv2dNative)     |
+   *               |                               |
+   *            B -+ DepthwiseFilterEncode --------+
+   *
+   *       Where
+   *                 A : ifm of TFDepthwiseConv2dNative
+   *                 B : ker of TFDepthwiseConv2dNative
+   *                 C : a node that uses TFDepthwiseConv2dNative as an input
+   *                 TFDepthwiseConv2dNative is disconnected from other nodes
+   */
  
    INFO(l) << "TFNodeCanonicalize TFDepthwiseConv2dNative begin";
  
diff --git a/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp

index 36136aed485a1dccac802d1bfc9d98e2598369c6..28ecc3fc05cab07be9680560abb10bf6ad41018c 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp
@@ -20,8 +20,6 @@
  
  #include "loco/Service/TypeInference.h"
  
-#include <stdex/Memory.h>
-
  namespace
  {
  
diff --git a/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp

index c53a880a804d4096b4da4057ed972cbeb4a53c9a..1179ef7f61c1f370c90c04d49c13f391f3529d0b 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp
@@ -18,8 +18,6 @@
  
  #include <moco/IR/TFDialect.h>
  
-#include <stdex/Memory.h>
-
  namespace
  {
  
diff --git a/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp

index 7965dc931d03245a2bf1c5a47b627bc35b5546e5..bb2a71bc09743ffd30abaac1677aa2a7df41c694 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp
@@ -18,8 +18,6 @@
  
  #include <moco/IR/TFDialect.h>
  
-#include <stdex/Memory.h>
-
  namespace
  {
  
diff --git a/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp

index c31dbf6d62eb1e2e515335fac3187779527f2f12..25eae62886e761cdeecf9b1ffa493aec3010e2c7 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp
@@ -23,7 +23,6 @@
  
  #include <loco/Service/TypeInference.h>
  
-#include <stdex/Memory.h>
  #include <oops/UserExn.h>
  
  namespace
diff --git a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp

index 98af7b693cc440b8c06a47d39044c2892db591b9..9fcb76c2acd92ba4169a24081e1ef1dd05e64a29 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp
@@ -31,16 +31,16 @@ bool canonicalize_softmax(loco::Graph *graph, moco::TFSoftmax *node)
    INFO(l) << "TFNodeCanonicalize TFSoftmax begin";
  
    /**
-  * This will replace shape inferred TFSoftmax node into canonical TensorSoftmax
-  *
-  * Before
-  *           In ---- TFSoftmax ---- Out(s)
-  *
-  * After
-  *             ------ TFSoftmax
-  *            /
-  *           In ---- TensorSoftmax ----- Out(s)
-  */
+   * This will replace shape inferred TFSoftmax node into canonical TensorSoftmax
+   *
+   * Before
+   *           In ---- TFSoftmax ---- Out(s)
+   *
+   * After
+   *             ------ TFSoftmax
+   *            /
+   *           In ---- TensorSoftmax ----- Out(s)
+   */
  
    auto nodeshape = moco::node_shape(node);
    // Canonicalization into TensorSoftmax is valid when softmax has shape info
diff --git a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h

index ebaf04cfe64f5699b7d3748acd28e0627333d3dd..0dd31503f523741dbfc13800ab6cb7f93e336c97 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h
+++ b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h
@@ -30,8 +30,8 @@ namespace tf
  {
  
  /**
-* @brief Canonicalize TF-dialect TFSoftmax into canonical Softmax node
-*/
+ * @brief Canonicalize TF-dialect TFSoftmax into canonical Softmax node
+ */
  class SoftmaxCanonicalizer : public SimpleNodeTransform<moco::TFSoftmax>
  {
  public:
diff --git a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp

index 574fa399350bced2e94bb46b184a5ae8201c0b96..47ac40ea8713272e7b6ac1db36292a6b124bba19 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp
@@ -30,16 +30,16 @@ bool canonicalize_stopgradient(loco::Graph *graph, moco::TFStopGradient *node)
    INFO(l) << "TFNodeCanonicalize TFStopGradient begin";
  
    /**
-  * This will replace shape inferred TFStopGradient node into canonical Forward
-  *
-  * Before
-  *           In --- TFStopGradient --- Out(s)
-  *
-  * After
-  *               -- TFStopGradient
-  *              /
-  *           In --- Forward --- Out(s)
-  */
+   * This will replace shape inferred TFStopGradient node into canonical Forward
+   *
+   * Before
+   *           In --- TFStopGradient --- Out(s)
+   *
+   * After
+   *               -- TFStopGradient
+   *              /
+   *           In --- Forward --- Out(s)
+   */
  
    // Create loco node to replace
    auto forward_node = graph->nodes()->create<loco::Forward>();
diff --git a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h

index 6a17728a6d2a90d2a2900e612c4e9b5b2d82565f..8346914c0ed39a25c71b6faea2da3a51fd47fb88 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h
+++ b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h
@@ -30,8 +30,8 @@ namespace tf
  {
  
  /**
-* @brief Canonicalize TF-dialect TFStopGradient into canonical Forward node
-*/
+ * @brief Canonicalize TF-dialect TFStopGradient into canonical Forward node
+ */
  class StopGradientCanonicalizer : public SimpleNodeTransform<moco::TFStopGradient>
  {
  public:
diff --git a/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp

index 081e0e5f9308a3ada78e68434ec9227b8ae5c401..3adf1733c07f78c343f11e61944cb7e6c5bf30c8 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp
@@ -18,8 +18,6 @@
  
  #include <moco/IR/TFDialect.h>
  
-#include <stdex/Memory.h>
-
  namespace
  {
  
diff --git a/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp

index 3f48a50fc1f96a2c1ec841cf20a1701410c00158..3b6e3c90c30a3e8357b58f4df904fec44d0cb9e5 100644 (file)
--- a/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp
@@ -18,8 +18,6 @@
  
  #include <moco/IR/TFDialect.h>
  
-#include <stdex/Memory.h>
-
  namespace
  {
  
diff --git a/compiler/moco-tf/src/Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalizer.cpp

index 04bc7c57ae7fef4c5ecee15db56601e65fa9a165..8e23d91dffd603797f2ce485cc4fb6b9657f3699 100644 (file)
--- a/compiler/moco-tf/src/Canonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalizer.cpp
@@ -56,8 +56,7 @@
  
  #include <logo/Phase.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  
  namespace
@@ -92,41 +91,41 @@ void Canonicalizer::canonicalize(loco::Graph *g) const
  
    /* TRANSFORM DECLARATION BEGIN */
    // Run shape and type inference at the top
-  phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
-  phase.emplace_back(stdex::make_unique<TypeInferencePass>());
+  phase.emplace_back(std::make_unique<ShapeInferencePass>());
+  phase.emplace_back(std::make_unique<TypeInferencePass>());
  
-  phase.emplace_back(stdex::make_unique<AddCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<AvgPoolCanonicalizer>());
+  phase.emplace_back(std::make_unique<AddCanonicalizer>());
+  phase.emplace_back(std::make_unique<AvgPoolCanonicalizer>());
    if (moco::tf::get<moco::tf::Knob::CanonicalizeBiasAdd>())
-    phase.emplace_back(stdex::make_unique<BiasAddCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<ConcatV2Canonicalizer>());
+    phase.emplace_back(std::make_unique<BiasAddCanonicalizer>());
+  phase.emplace_back(std::make_unique<ConcatV2Canonicalizer>());
    if (moco::tf::get<moco::tf::Knob::CanonicalizeConst>())
-    phase.emplace_back(stdex::make_unique<ConstCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<Conv2DBackpropInputCanonicalizer>());
+    phase.emplace_back(std::make_unique<ConstCanonicalizer>());
+  phase.emplace_back(std::make_unique<Conv2DBackpropInputCanonicalizer>());
    if (moco::tf::get<moco::tf::Knob::CanonicalizeConv2D>())
-    phase.emplace_back(stdex::make_unique<Conv2DCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<DepthwiseConv2dNativeCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<IdentityCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<MaximumCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<MaxPoolCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<MeanCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<MulCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<PadCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<PlaceholderCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<RealDivCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<ReluCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<Relu6Canonicalizer>());
-  phase.emplace_back(stdex::make_unique<ReshapeCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<RsqrtCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<SoftmaxCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<SqrtCanonicalizer>());
+    phase.emplace_back(std::make_unique<Conv2DCanonicalizer>());
+  phase.emplace_back(std::make_unique<DepthwiseConv2dNativeCanonicalizer>());
+  phase.emplace_back(std::make_unique<IdentityCanonicalizer>());
+  phase.emplace_back(std::make_unique<MaximumCanonicalizer>());
+  phase.emplace_back(std::make_unique<MaxPoolCanonicalizer>());
+  phase.emplace_back(std::make_unique<MeanCanonicalizer>());
+  phase.emplace_back(std::make_unique<MulCanonicalizer>());
+  phase.emplace_back(std::make_unique<PadCanonicalizer>());
+  phase.emplace_back(std::make_unique<PlaceholderCanonicalizer>());
+  phase.emplace_back(std::make_unique<RealDivCanonicalizer>());
+  phase.emplace_back(std::make_unique<ReluCanonicalizer>());
+  phase.emplace_back(std::make_unique<Relu6Canonicalizer>());
+  phase.emplace_back(std::make_unique<ReshapeCanonicalizer>());
+  phase.emplace_back(std::make_unique<RsqrtCanonicalizer>());
+  phase.emplace_back(std::make_unique<SoftmaxCanonicalizer>());
+  phase.emplace_back(std::make_unique<SqrtCanonicalizer>());
    // NOTE SquaredDifference is handled in ResolveSquaredDifference
-  phase.emplace_back(stdex::make_unique<SqueezeCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<StopGradientCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<SubCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<TanhCanonicalizer>());
+  phase.emplace_back(std::make_unique<SqueezeCanonicalizer>());
+  phase.emplace_back(std::make_unique<StopGradientCanonicalizer>());
+  phase.emplace_back(std::make_unique<SubCanonicalizer>());
+  phase.emplace_back(std::make_unique<TanhCanonicalizer>());
    // For virtual nodes
-  phase.emplace_back(stdex::make_unique<TFPushCanonicalizer>());
+  phase.emplace_back(std::make_unique<TFPushCanonicalizer>());
    /* TRANSFORM DECLARATION END */
  
    ProgressReporter prog(g, logo::PhaseStrategy::Restart);
diff --git a/compiler/moco-tf/src/CodecHelper.h b/compiler/moco-tf/src/CodecHelper.h

index 85e4e21643b43b849a08c8352e99df29549dd0aa..a4ca8d5cacb09a67e0838477eceb1ded4421b8cb 100644 (file)
--- a/compiler/moco-tf/src/CodecHelper.h
+++ b/compiler/moco-tf/src/CodecHelper.h
@@ -18,7 +18,8 @@
  #define __CODEC_HELPER_H__
  
  #include <plier/tf/Convert.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -27,7 +28,7 @@ using plier::tf::DataLayout;
  
  void set_feature_enc(loco::FeatureEncode *feature_enc, DataLayout data_layout)
  {
-  auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+  auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
  
    if (data_layout == DataLayout::NHWC)
    {
@@ -49,7 +50,7 @@ void set_feature_enc(loco::FeatureEncode *feature_enc, DataLayout data_layout)
  
  void set_feature_dec(loco::FeatureDecode *feature_dec, DataLayout data_layout)
  {
-  auto dec = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+  auto dec = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
  
    if (data_layout == DataLayout::NHWC)
    {
diff --git a/compiler/moco-tf/src/Frontend.cpp b/compiler/moco-tf/src/Frontend.cpp

index a17d5dd0ee1dd5bde4cfba2d3e1e60a5d62ae3c3..0d5250b1763048b2c63ac76c636290393cd2db60 100644 (file)
--- a/compiler/moco-tf/src/Frontend.cpp
+++ b/compiler/moco-tf/src/Frontend.cpp
@@ -31,13 +31,13 @@
  
  #include <loco/Service/ShapeInference.h>
  
-#include <stdex/Memory.h>
  #include <oops/UserExn.h>
  
  #include <google/protobuf/io/coded_stream.h>
  #include <google/protobuf/io/zero_copy_stream_impl.h>
  #include <google/protobuf/text_format.h>
  
+#include <memory>
  #include <iostream>
  #include <sstream>
  #include <fstream>
@@ -157,7 +157,7 @@ moco::GraphBuilderRegistry make_graph_builder_registry(const moco::ModelSignatur
    for (const auto &custom_op : sig.customops())
    {
      std::unique_ptr<moco::tf::COpCallGraphBuilder> builder =
-        stdex::make_unique<moco::tf::COpCallGraphBuilder>(&sig);
+      std::make_unique<moco::tf::COpCallGraphBuilder>(&sig);
      registry.add(custom_op, std::move(builder));
    }
  
@@ -243,7 +243,7 @@ std::unique_ptr<loco::Graph> Frontend::import(const ModelSignature &signature,
      auto input = graph->inputs()->at(n);
      auto input_node = moco::placeholder_node(graph.get(), n);
      assert(input_node != nullptr);
-    input->shape(stdex::make_unique<loco::TensorShape>(tensor_shape(input_node)));
+    input->shape(std::make_unique<loco::TensorShape>(tensor_shape(input_node)));
    }
  
    for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
@@ -251,7 +251,7 @@ std::unique_ptr<loco::Graph> Frontend::import(const ModelSignature &signature,
      auto output = graph->outputs()->at(n);
      auto output_node = moco::push_node(graph.get(), n);
      assert(output_node != nullptr);
-    output->shape(stdex::make_unique<loco::TensorShape>(::tensor_shape(output_node)));
+    output->shape(std::make_unique<loco::TensorShape>(::tensor_shape(output_node)));
    }
  
    // Convert graph to hold only Canonical dialect
diff --git a/compiler/moco-tf/src/Knob.cpp b/compiler/moco-tf/src/Knob.cpp

index 0e1c7e0eada3a7d1a2ae1b681ec462e7abd5c9d8..a13895f682f8933e9e4862265f573d426d89b99d 100644 (file)
--- a/compiler/moco-tf/src/Knob.cpp
+++ b/compiler/moco-tf/src/Knob.cpp
@@ -109,12 +109,12 @@ namespace moco
  namespace tf
  {
  
-#define KNOB_BOOL(NAME, DEFAULT, DESC)                                                           \
-  template <> typename KnobTrait<Knob::NAME>::ValueType get<Knob::NAME>(void)                    \
-  {                                                                                              \
-    static typename KnobTrait<Knob::NAME>::ValueType value =                                     \
-        ::knob_load<typename KnobTrait<Knob::NAME>::ValueType>(::knob_loader(), #NAME, DEFAULT); \
-    return value;                                                                                \
+#define KNOB_BOOL(NAME, DEFAULT, DESC)                                                         \
+  template <> typename KnobTrait<Knob::NAME>::ValueType get<Knob::NAME>(void)                  \
+  {                                                                                            \
+    static typename KnobTrait<Knob::NAME>::ValueType value =                                   \
+      ::knob_load<typename KnobTrait<Knob::NAME>::ValueType>(::knob_loader(), #NAME, DEFAULT); \
+    return value;                                                                              \
    }
  #include "Knob.lst"
  #undef KNOB_BOOL
diff --git a/compiler/moco-tf/src/LogHelper.cpp b/compiler/moco-tf/src/LogHelper.cpp

index 92ff75569b18e39dae1cc2d687cfd4754c5d50de..6b127020ab9466b17d8df5ec9afb256b1fad8817 100644 (file)
--- a/compiler/moco-tf/src/LogHelper.cpp
+++ b/compiler/moco-tf/src/LogHelper.cpp
@@ -74,7 +74,7 @@ namespace tf
  
  FormattedGraph fmt(loco::Graph *g)
  {
-  auto node_summary_builder = stdex::make_unique<TFNodeSummaryBuilderFactory>();
+  auto node_summary_builder = std::make_unique<TFNodeSummaryBuilderFactory>();
    return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
  }
  
diff --git a/compiler/moco-tf/src/Op/COpCall.cpp b/compiler/moco-tf/src/Op/COpCall.cpp

index 801196f0fdc3a6b471f399891740a51b56db8053..af4bc9dc4728906bf7fd1603bf4438bb7ebd58c4 100644 (file)
--- a/compiler/moco-tf/src/Op/COpCall.cpp
+++ b/compiler/moco-tf/src/Op/COpCall.cpp
@@ -23,9 +23,9 @@
  #include <moco/Names.h>
  #include <moco/tf/Frontend.h>
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <oops/UserExn.h>
  
+#include <memory>
  #include <vector>
  #include <cassert>
  #include <stdexcept>
@@ -37,7 +37,7 @@ class COpCallGraphUpdate final : public moco::GraphUpdate
  {
  public:
    COpCallGraphUpdate(locoex::COpCall *node, const std::vector<moco::TensorName> &input_names)
-      : _node(node), _input_names(input_names)
+    : _node(node), _input_names(input_names)
    {
    }
  
@@ -94,11 +94,11 @@ void COpCallGraphBuilder::build(const tensorflow::NodeDef &tf_node,
  
        if (val.value_case() == tensorflow::AttrValue::kF)
        {
-        call_node->attr(name, stdex::make_unique<locoex::COpAttrFloat>(val.f()));
+        call_node->attr(name, std::make_unique<locoex::COpAttrFloat>(val.f()));
        }
        else if (val.value_case() == tensorflow::AttrValue::kI)
        {
-        call_node->attr(name, stdex::make_unique<locoex::COpAttrInt>(val.i()));
+        call_node->attr(name, std::make_unique<locoex::COpAttrInt>(val.i()));
        }
        // TODO define more types
        else
@@ -118,7 +118,7 @@ void COpCallGraphBuilder::build(const tensorflow::NodeDef &tf_node,
    {
      input_names.emplace_back(TensorName(tf_node.input(i)));
    }
-  auto update = stdex::make_unique<COpCallGraphUpdate>(call_node, input_names);
+  auto update = std::make_unique<COpCallGraphUpdate>(call_node, input_names);
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco-tf/src/Op/COpCall.h b/compiler/moco-tf/src/Op/COpCall.h

index 0bb8a93c95835c49c69de042786df4f84b39ce7f..2f0ee1e36819a51a8361aea2c4824f8623bfc5fc 100644 (file)
--- a/compiler/moco-tf/src/Op/COpCall.h
+++ b/compiler/moco-tf/src/Op/COpCall.h
@@ -32,7 +32,9 @@ namespace tf
  class COpCallGraphBuilder final : public GraphBuilder
  {
  public:
-  COpCallGraphBuilder(const ModelSignature *signature) : _signature(signature) { /* empty */}
+  COpCallGraphBuilder(const ModelSignature *signature) : _signature(signature)
+  { /* empty */
+  }
    bool validate(const tensorflow::NodeDef &) const override;
    void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
  
diff --git a/compiler/moco-tf/src/Op/COpCall.test.cpp b/compiler/moco-tf/src/Op/COpCall.test.cpp

index f131182924bedcd2e5e0c93670483817c794efe6..7e1ffa9542381d0ca71fdbdbebfbd4dfde1cad11 100644 (file)
--- a/compiler/moco-tf/src/Op/COpCall.test.cpp
+++ b/compiler/moco-tf/src/Op/COpCall.test.cpp
@@ -27,10 +27,11 @@
  
  #include <loco.h>
  #include <plier/tf/TestHelper.h>
-#include <stdex/Memory.h>
  
  #include <gtest/gtest.h>
  
+#include <memory>
+
  using namespace moco::tf::test;
  
  namespace
@@ -91,7 +92,7 @@ TEST(Call_Test, Call_01)
  
    // import
    moco::GraphBuilderRegistry registry{&moco::GraphBuilderRegistry::get()};
-  registry.add("new_custom_op", stdex::make_unique<moco::tf::COpCallGraphBuilder>(&signature));
+  registry.add("new_custom_op", std::make_unique<moco::tf::COpCallGraphBuilder>(&signature));
  
    moco::Importer importer(&registry);
    std::unique_ptr<loco::Graph> graph = importer.import(signature, graph_def);
diff --git a/compiler/moco-tf/src/Optimizer.cpp b/compiler/moco-tf/src/Optimizer.cpp

index f33b4109bf8fdaef88c35f8ad1f9cd00388bad0f..51e1e1c4f26de071a10563819b54e56cc3ab5659 100644 (file)
--- a/compiler/moco-tf/src/Optimizer.cpp
+++ b/compiler/moco-tf/src/Optimizer.cpp
@@ -22,7 +22,7 @@
  
  #include <logo/Phase.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace moco
  {
@@ -35,48 +35,48 @@ void Optimizer::optimize(loco::Graph *g) const
  
    /* TRANSFORM DECLARATION BEGIN */
    // Shape inference is required for ResolveRedundantReshape
-  phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+  phase.emplace_back(std::make_unique<ShapeInferencePass>());
  
    if (moco::tf::get<moco::tf::Knob::ConstantFolding>())
    {
-    phase.emplace_back(stdex::make_unique<logo::ConstantFoldingPass>());
+    phase.emplace_back(std::make_unique<logo::ConstantFoldingPass>());
    }
  
    if (moco::tf::get<moco::tf::Knob::RemoveDeadNode>())
    {
-    phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
    }
  
    if (moco::tf::get<moco::tf::Knob::ReorderDecode>() &&
        moco::tf::get<moco::tf::Knob::ReorderDecodeTensorBiasAdd>())
    {
-    phase.emplace_back(stdex::make_unique<logo::ReorderDecodePass<loco::TensorBiasAdd>>());
+    phase.emplace_back(std::make_unique<logo::ReorderDecodePass<loco::TensorBiasAdd>>());
    }
  
    if (moco::tf::get<moco::tf::Knob::ReorderDecode>() &&
        moco::tf::get<moco::tf::Knob::ReorderDecodeReLU>())
    {
-    phase.emplace_back(stdex::make_unique<logo::ReorderDecodePass<loco::ReLU>>());
+    phase.emplace_back(std::make_unique<logo::ReorderDecodePass<loco::ReLU>>());
    }
  
    if (moco::tf::get<moco::tf::Knob::SimplifyDomainConversion>())
    {
-    phase.emplace_back(stdex::make_unique<logo::SimplifyDomainConversionPass>());
+    phase.emplace_back(std::make_unique<logo::SimplifyDomainConversionPass>());
    }
  
    if (moco::tf::get<moco::tf::Knob::RemoveForwardNode>())
    {
-    phase.emplace_back(stdex::make_unique<logo::RemoveForwardNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveForwardNodePass>());
    }
  
    if (moco::tf::get<moco::tf::Knob::ResolveDuplicateReshape>())
    {
-    phase.emplace_back(stdex::make_unique<logo::ResolveDuplicateReshapePass>());
+    phase.emplace_back(std::make_unique<logo::ResolveDuplicateReshapePass>());
    }
  
    if (moco::tf::get<moco::tf::Knob::ResolveRedundantReshape>())
    {
-    phase.emplace_back(stdex::make_unique<logo::ResolveRedundantReshapePass>());
+    phase.emplace_back(std::make_unique<logo::ResolveRedundantReshapePass>());
    }
    /* TRANSFORM DECLARATION END */
  
diff --git a/compiler/moco-tf/src/ProgressReporter.h b/compiler/moco-tf/src/ProgressReporter.h

index 190d972c55cbb28efb2847fb2f480aa62978b0dd..440d2922121557727085b406232ee6b5bc589c1c 100644 (file)
--- a/compiler/moco-tf/src/ProgressReporter.h
+++ b/compiler/moco-tf/src/ProgressReporter.h
@@ -30,7 +30,7 @@ class ProgressReporter : public logo::PhaseEventListener
  {
  public:
    ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
-      : _graph{graph}, _strategy{strategy}
+    : _graph{graph}, _strategy{strategy}
    {
      // DO NOTHING
    }
diff --git a/compiler/moco-tf/src/TFFormattedGraph.h b/compiler/moco-tf/src/TFFormattedGraph.h

index f79208536bdb3d7c3775a163136c1cd02f23d43c..81978954fa7b5816143d745db85ed60dc6d9f449 100644 (file)
--- a/compiler/moco-tf/src/TFFormattedGraph.h
+++ b/compiler/moco-tf/src/TFFormattedGraph.h
@@ -19,7 +19,7 @@
  
  #include <locop/FormattedGraph.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace moco
  {
@@ -49,7 +49,7 @@ public:
  public:
    std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
    {
-    return stdex::make_unique<MocoNodeSummaryBuilder>(tlb);
+    return std::make_unique<MocoNodeSummaryBuilder>(tlb);
    }
  };
  
diff --git a/compiler/moco-tf/src/TFOptimizer.cpp b/compiler/moco-tf/src/TFOptimizer.cpp

index 2256b99b800283fe89aa9226398220b8f0795af4..720cd9d9aad998cc4ceefa8e47aece6dcba8761e 100644 (file)
--- a/compiler/moco-tf/src/TFOptimizer.cpp
+++ b/compiler/moco-tf/src/TFOptimizer.cpp
@@ -22,7 +22,7 @@
  
  #include <logo/Phase.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace moco
  {
@@ -36,39 +36,39 @@ void TFOptimizer::optimize(loco::Graph *g) const
    /* TRANSFORM DECLARATION BEGIN */
    if (moco::tf::get<moco::tf::Knob::ResolveFusedBatchNorm>())
    {
-    phase.emplace_back(stdex::make_unique<moco::ResolveFusedBatchNorm>());
+    phase.emplace_back(std::make_unique<moco::ResolveFusedBatchNorm>());
    }
    if (moco::tf::get<moco::tf::Knob::FuseBinaryIntoPreceding>())
    {
-    phase.emplace_back(stdex::make_unique<moco::FuseBinaryIntoPreceding>());
+    phase.emplace_back(std::make_unique<moco::FuseBinaryIntoPreceding>());
    }
    if (moco::tf::get<moco::tf::Knob::ResolveConstantShape>())
    {
-    phase.emplace_back(stdex::make_unique<moco::ResolveConstantShape>());
+    phase.emplace_back(std::make_unique<moco::ResolveConstantShape>());
    }
    if (moco::tf::get<moco::tf::Knob::ResolveReshapeWildcardDim>())
    {
-    phase.emplace_back(stdex::make_unique<moco::ResolveReshapeWildcardDim>());
+    phase.emplace_back(std::make_unique<moco::ResolveReshapeWildcardDim>());
    }
    if (moco::tf::get<moco::tf::Knob::ResolveSquaredDifference>())
    {
-    phase.emplace_back(stdex::make_unique<moco::ResolveSquaredDifference>());
+    phase.emplace_back(std::make_unique<moco::ResolveSquaredDifference>());
    }
    if (moco::tf::get<moco::tf::Knob::RemoveTFIdentityNode>())
    {
-    phase.emplace_back(stdex::make_unique<moco::RemoveTFIdentityNode>());
+    phase.emplace_back(std::make_unique<moco::RemoveTFIdentityNode>());
    }
    if (moco::tf::get<moco::tf::Knob::RemoveDeadNode>())
    {
-    phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
    }
    if (moco::tf::get<moco::tf::Knob::SqueezeReduceNode>())
    {
-    phase.emplace_back(stdex::make_unique<moco::SqueezeReduceNode>());
+    phase.emplace_back(std::make_unique<moco::SqueezeReduceNode>());
    }
    // Shape inference is needed for added nodes doing above transformations
-  phase.emplace_back(stdex::make_unique<moco::tf::ShapeInferencePass>());
-  phase.emplace_back(stdex::make_unique<moco::tf::TypeInferencePass>());
+  phase.emplace_back(std::make_unique<moco::tf::ShapeInferencePass>());
+  phase.emplace_back(std::make_unique<moco::tf::TypeInferencePass>());
    /* TRANSFORM DECLARATION END */
  
    ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
diff --git a/compiler/moco-tf/src/TestHelper.test.cpp b/compiler/moco-tf/src/TestHelper.test.cpp

index 1e8c38e36548e35a9cc637d60c57649e89d43ded..36ce1114acc73e07595fe177bc9ec1cdac136535 100644 (file)
--- a/compiler/moco-tf/src/TestHelper.test.cpp
+++ b/compiler/moco-tf/src/TestHelper.test.cpp
@@ -48,7 +48,7 @@ void setup_output_node(loco::Graph *graph, loco::Node *last_node)
  
  #include <moco/IR/Nodes/TFConst.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -62,7 +62,7 @@ namespace test
  TFNodeBuildTester::TFNodeBuildTester()
  {
    _graph = loco::make_graph();
-  _tensor_names = stdex::make_unique<moco::SymbolTable>();
+  _tensor_names = std::make_unique<moco::SymbolTable>();
  }
  
  void TFNodeBuildTester::inputs(const std::vector<std::string> &names)
@@ -91,8 +91,8 @@ void TFNodeBuildTester::run(tensorflow::NodeDef &nodedef, moco::GraphBuilder &gr
  {
    assert(_output != nullptr);
  
-  auto node_defs = stdex::make_unique<moco::NodeDefTable>();
-  auto updates = stdex::make_unique<moco::UpdateQueue>();
+  auto node_defs = std::make_unique<moco::NodeDefTable>();
+  auto updates = std::make_unique<moco::UpdateQueue>();
  
    moco::GraphBuilderContext gb_context(_graph.get(), node_defs.get(), _tensor_names.get(),
                                         updates.get());
diff --git a/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp b/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp

index 64ba9dfb117753661af8e3630ed3065c5e30643a..8f46cfbbcf19175ed0a5061ec1daea9667581b10 100644 (file)
--- a/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp
+++ b/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp
@@ -46,8 +46,8 @@ bool ShapeInferencePass::run(loco::Graph *graph)
    loco::MultiDialectShapeInferenceRule rules;
  
    rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(TFDialect::get(), &tf_rule)
-      .bind(locoex::COpDialect::get(), &cop_rule);
+    .bind(TFDialect::get(), &tf_rule)
+    .bind(locoex::COpDialect::get(), &cop_rule);
  
    return loco::apply(&rules).to(graph);
  }
diff --git a/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp b/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp

index db6cf752164d894f60355ecf0658d27c7e5592b2..2e2d4a9c10e98d6243ed4e588e2a9583a6aecc0d 100644 (file)
--- a/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp
+++ b/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp
@@ -42,8 +42,8 @@ bool TypeInferencePass::run(loco::Graph *graph)
    loco::MultiDialectTypeInferenceRule rules;
  
    rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(TFDialect::get(), &tf_rule)
-      .bind(locoex::COpDialect::get(), &cop_rule);
+    .bind(TFDialect::get(), &tf_rule)
+    .bind(locoex::COpDialect::get(), &cop_rule);
  
    loco::apply(&rules).to(graph);
  
diff --git a/compiler/moco/import/CMakeLists.txt b/compiler/moco/import/CMakeLists.txt

index 43107776ef08df94e01d18d70db4eb8b688d78fb..460c2c98bee112c8b542daa8d246dac8bff7c8d9 100644 (file)
--- a/compiler/moco/import/CMakeLists.txt
+++ b/compiler/moco/import/CMakeLists.txt
@@ -7,7 +7,6 @@ target_include_directories(moco_import PRIVATE src)
  target_include_directories(moco_import PUBLIC include)
  target_link_libraries(moco_import PUBLIC moco_lang)
  target_link_libraries(moco_import PUBLIC mio_tf)
-target_link_libraries(moco_import PUBLIC stdex)
  target_link_libraries(moco_import PRIVATE nncc_common)
  target_link_libraries(moco_import PRIVATE plier_tf)
  target_link_libraries(moco_import PRIVATE oops)
diff --git a/compiler/moco/import/include/moco/Import/GraphBuilderContext.h b/compiler/moco/import/include/moco/Import/GraphBuilderContext.h

index ae4f02c2abc618be7a5fbd039662209089f9d402..76a9644b510e7054dc9b138bf1ff29ff07926802 100644 (file)
--- a/compiler/moco/import/include/moco/Import/GraphBuilderContext.h
+++ b/compiler/moco/import/include/moco/Import/GraphBuilderContext.h
@@ -118,7 +118,7 @@ class GraphBuilderContext
  public:
    GraphBuilderContext(loco::Graph *g, NodeDefTable *nodedef, SymbolTable *tensor_names,
                        UpdateQueue *updates)
-      : _g(g), _nodedef(nodedef), _tensor_names(tensor_names), _updates(updates)
+    : _g(g), _nodedef(nodedef), _tensor_names(tensor_names), _updates(updates)
    {
      // DO NOTHING
    }
diff --git a/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h b/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h

index da65cffb81893c53950c2887e382cff7cce87bd9..c99dca1cf2d36b3dcb1c44b4ba52d5306206e25d 100644 (file)
--- a/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h
+++ b/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h
@@ -82,6 +82,6 @@ private:
    std::map<const std::string, std::unique_ptr<GraphBuilder>> _builder_map;
  };
  
-} // namespace mono
+} // namespace moco
  
  #endif // __MOCO_IMPORT_GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Softmax.h b/compiler/moco/import/include/moco/Import/Nodes/Softmax.h

index 43fbb8852bed66dfd72cc8ffde8c11fc4912937a..290818958507893ee756871b2efa9160763b567b 100644 (file)
--- a/compiler/moco/import/include/moco/Import/Nodes/Softmax.h
+++ b/compiler/moco/import/include/moco/Import/Nodes/Softmax.h
@@ -23,8 +23,8 @@ namespace moco
  {
  
  /**
-* @brief GraphBuilder for Softmax node
-*/
+ * @brief GraphBuilder for Softmax node
+ */
  class SoftmaxGraphBuilder final : public GraphBuilder
  {
  public:
diff --git a/compiler/moco/import/src/GraphBuilderRegistry.cpp b/compiler/moco/import/src/GraphBuilderRegistry.cpp

index 3a028513f6515f5a48b5f94d86f932c9bf4ee18e..7e91ca9d0dda64f3c73e0badfde8b1cc91c1276e 100644 (file)
--- a/compiler/moco/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/moco/import/src/GraphBuilderRegistry.cpp
@@ -17,45 +17,45 @@
  #include "moco/Import/GraphBuilderRegistry.h"
  #include "moco/Import/Nodes.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace moco
  {
  
  GraphBuilderRegistry::GraphBuilderRegistry()
  {
-  add("Add", stdex::make_unique<AddGraphBuilder>());
-  add("AvgPool", stdex::make_unique<AvgPoolGraphBuilder>());
-  add("BiasAdd", stdex::make_unique<BiasAddGraphBuilder>());
-  add("ConcatV2", stdex::make_unique<ConcatV2GraphBuilder>());
-  add("Const", stdex::make_unique<ConstGraphBuilder>());
-  add("Conv2D", stdex::make_unique<Conv2DGraphBuilder>());
-  add("Conv2DBackpropInput", stdex::make_unique<Conv2DBackpropInputGraphBuilder>());
-  add("DepthwiseConv2dNative", stdex::make_unique<DepthwiseConv2dNativeGraphBuilder>());
-  add("FakeQuantWithMinMaxVars", stdex::make_unique<FakeQuantWithMinMaxVarsGraphBuilder>());
-  add("FusedBatchNorm", stdex::make_unique<FusedBatchNormGraphBuilder>());
-  add("Identity", stdex::make_unique<IdentityGraphBuilder>());
-  add("Maximum", stdex::make_unique<MaximumGraphBuilder>());
-  add("MaxPool", stdex::make_unique<MaxPoolGraphBuilder>());
-  add("Mean", stdex::make_unique<MeanGraphBuilder>());
-  add("Mul", stdex::make_unique<MulGraphBuilder>());
-  add("Pack", stdex::make_unique<PackGraphBuilder>());
-  add("Pad", stdex::make_unique<PadGraphBuilder>());
-  add("Placeholder", stdex::make_unique<PlaceholderGraphBuilder>());
-  add("RealDiv", stdex::make_unique<RealDivGraphBuilder>());
-  add("Relu", stdex::make_unique<ReluGraphBuilder>());
-  add("Relu6", stdex::make_unique<Relu6GraphBuilder>());
-  add("Reshape", stdex::make_unique<ReshapeGraphBuilder>());
-  add("Rsqrt", stdex::make_unique<RsqrtGraphBuilder>());
-  add("Shape", stdex::make_unique<ShapeGraphBuilder>());
-  add("Softmax", stdex::make_unique<SoftmaxGraphBuilder>());
-  add("Sqrt", stdex::make_unique<SqrtGraphBuilder>());
-  add("SquaredDifference", stdex::make_unique<SquaredDifferenceGraphBuilder>());
-  add("Squeeze", stdex::make_unique<SqueezeGraphBuilder>());
-  add("StopGradient", stdex::make_unique<StopGradientGraphBuilder>());
-  add("StridedSlice", stdex::make_unique<StridedSliceGraphBuilder>());
-  add("Sub", stdex::make_unique<SubGraphBuilder>());
-  add("Tanh", stdex::make_unique<TanhGraphBuilder>());
+  add("Add", std::make_unique<AddGraphBuilder>());
+  add("AvgPool", std::make_unique<AvgPoolGraphBuilder>());
+  add("BiasAdd", std::make_unique<BiasAddGraphBuilder>());
+  add("ConcatV2", std::make_unique<ConcatV2GraphBuilder>());
+  add("Const", std::make_unique<ConstGraphBuilder>());
+  add("Conv2D", std::make_unique<Conv2DGraphBuilder>());
+  add("Conv2DBackpropInput", std::make_unique<Conv2DBackpropInputGraphBuilder>());
+  add("DepthwiseConv2dNative", std::make_unique<DepthwiseConv2dNativeGraphBuilder>());
+  add("FakeQuantWithMinMaxVars", std::make_unique<FakeQuantWithMinMaxVarsGraphBuilder>());
+  add("FusedBatchNorm", std::make_unique<FusedBatchNormGraphBuilder>());
+  add("Identity", std::make_unique<IdentityGraphBuilder>());
+  add("Maximum", std::make_unique<MaximumGraphBuilder>());
+  add("MaxPool", std::make_unique<MaxPoolGraphBuilder>());
+  add("Mean", std::make_unique<MeanGraphBuilder>());
+  add("Mul", std::make_unique<MulGraphBuilder>());
+  add("Pack", std::make_unique<PackGraphBuilder>());
+  add("Pad", std::make_unique<PadGraphBuilder>());
+  add("Placeholder", std::make_unique<PlaceholderGraphBuilder>());
+  add("RealDiv", std::make_unique<RealDivGraphBuilder>());
+  add("Relu", std::make_unique<ReluGraphBuilder>());
+  add("Relu6", std::make_unique<Relu6GraphBuilder>());
+  add("Reshape", std::make_unique<ReshapeGraphBuilder>());
+  add("Rsqrt", std::make_unique<RsqrtGraphBuilder>());
+  add("Shape", std::make_unique<ShapeGraphBuilder>());
+  add("Softmax", std::make_unique<SoftmaxGraphBuilder>());
+  add("Sqrt", std::make_unique<SqrtGraphBuilder>());
+  add("SquaredDifference", std::make_unique<SquaredDifferenceGraphBuilder>());
+  add("Squeeze", std::make_unique<SqueezeGraphBuilder>());
+  add("StopGradient", std::make_unique<StopGradientGraphBuilder>());
+  add("StridedSlice", std::make_unique<StridedSliceGraphBuilder>());
+  add("Sub", std::make_unique<SubGraphBuilder>());
+  add("Tanh", std::make_unique<TanhGraphBuilder>());
  
    // Virtual node like `TFPush` need not to be added here
  }
diff --git a/compiler/moco/import/src/Importer.cpp b/compiler/moco/import/src/Importer.cpp

index 3813affce271d5ec8ef3c20c0ed045fafca13c95..333f0f6a9492c9e7f3ace59854b8c149e31c8119 100644 (file)
--- a/compiler/moco/import/src/Importer.cpp
+++ b/compiler/moco/import/src/Importer.cpp
@@ -23,9 +23,9 @@
  #include <moco/IR/Nodes/TFPlaceholder.h>
  #include <moco/IR/TFNode.h>
  
-#include <stdex/Memory.h>
  #include <oops/UserExn.h>
  
+#include <memory>
  #include <cassert>
  #include <sstream>
  #include <stdexcept>
@@ -36,9 +36,9 @@ namespace
  void convert_graph(const moco::GraphBuilderSource &source, const moco::ModelSignature &signature,
                     tensorflow::GraphDef &tf_graph_def, loco::Graph *graph)
  {
-  auto nodedef = stdex::make_unique<moco::NodeDefTable>();
-  auto tensor_names = stdex::make_unique<moco::SymbolTable>();
-  auto updates = stdex::make_unique<moco::UpdateQueue>();
+  auto nodedef = std::make_unique<moco::NodeDefTable>();
+  auto tensor_names = std::make_unique<moco::SymbolTable>();
+  auto updates = std::make_unique<moco::UpdateQueue>();
  
    moco::GraphBuilderContext gb_context(graph, nodedef.get(), tensor_names.get(), updates.get());
  
diff --git a/compiler/moco/import/src/Nodes/Add.cpp b/compiler/moco/import/src/Nodes/Add.cpp

index 6981a55e1649e1877f80d7cb1c420bde6c29466a..af743316b941c30d93ddcf367b44623e3f0c9f89 100644 (file)
--- a/compiler/moco/import/src/Nodes/Add.cpp
+++ b/compiler/moco/import/src/Nodes/Add.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFAdd.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -78,7 +79,7 @@ void AddGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext
    add_input_names.push_back(TensorName(node.input(0))); // x
    add_input_names.push_back(TensorName(node.input(1))); // y
  
-  auto tf_add_update = stdex::make_unique<TFAddGraphUpdate>(tf_add, add_input_names);
+  auto tf_add_update = std::make_unique<TFAddGraphUpdate>(tf_add, add_input_names);
    updates->enroll(std::move(tf_add_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/AvgPool.cpp b/compiler/moco/import/src/Nodes/AvgPool.cpp

index 6d7fd36bb380a15e5e9f0217e0786be5441e011e..95232b977b47768c1854d6ce474aae157ef3c2e7 100644 (file)
--- a/compiler/moco/import/src/Nodes/AvgPool.cpp
+++ b/compiler/moco/import/src/Nodes/AvgPool.cpp
@@ -22,10 +22,10 @@
  
  #include "Convert.h"
  #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  #include <oops/UserExn.h>
  
+#include <memory>
  #include <cassert>
  #include <stdexcept>
  
@@ -40,7 +40,7 @@ class TFAvgPoolGraphUpdate final : public GraphUpdate
  {
  public:
    TFAvgPoolGraphUpdate(TFAvgPool *node, const TensorName &name)
-      : _avgpool_node(node), _value_name(name)
+    : _avgpool_node(node), _value_name(name)
    {
    }
  
@@ -127,7 +127,7 @@ void AvgPoolGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
    tensor_names->enroll(output_name, avgPool_node);
  
    // Record ifm inputs to featureEncode_node
-  auto update = stdex::make_unique<TFAvgPoolGraphUpdate>(avgPool_node, TensorName(node.input(0)));
+  auto update = std::make_unique<TFAvgPoolGraphUpdate>(avgPool_node, TensorName(node.input(0)));
  
    updates->enroll(std::move(update));
  }
diff --git a/compiler/moco/import/src/Nodes/BiasAdd.cpp b/compiler/moco/import/src/Nodes/BiasAdd.cpp

index a3eb9111661b569412ec9fc1a866606a3073853f..d4bc161d5eee21ecb1b818cfdd74b05da08cb557 100644 (file)
--- a/compiler/moco/import/src/Nodes/BiasAdd.cpp
+++ b/compiler/moco/import/src/Nodes/BiasAdd.cpp
@@ -22,10 +22,10 @@
  
  #include <loco.h>
  #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  #include <oops/UserExn.h>
  
+#include <memory>
  #include <cassert>
  #include <vector>
  
@@ -37,7 +37,7 @@ class TFBiasAddGraphUpdate final : public GraphUpdate
  {
  public:
    TFBiasAddGraphUpdate(TFBiasAdd *biasadd, std::vector<TensorName> &names)
-      : _biasadd(biasadd), _names(names)
+    : _biasadd(biasadd), _names(names)
    {
    }
  
@@ -115,7 +115,7 @@ void BiasAddGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
    input_names.push_back(TensorName(node.input(0)));
    input_names.push_back(TensorName(node.input(1)));
  
-  auto update = stdex::make_unique<TFBiasAddGraphUpdate>(tf_bias_add, input_names);
+  auto update = std::make_unique<TFBiasAddGraphUpdate>(tf_bias_add, input_names);
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Concat.cpp b/compiler/moco/import/src/Nodes/Concat.cpp

index 8bf8a84b5adfea7cda5aad8d665aec2bd3dd2412..dea60a7377cc2b965c5ee590467041e7ebf3dad1 100644 (file)
--- a/compiler/moco/import/src/Nodes/Concat.cpp
+++ b/compiler/moco/import/src/Nodes/Concat.cpp
@@ -21,9 +21,9 @@
  #include <moco/Names.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  
+#include <memory>
  #include <cassert>
  
  namespace
@@ -35,7 +35,7 @@ class TFConcatV2GraphUpdate final : public GraphUpdate
  {
  public:
    TFConcatV2GraphUpdate(TFConcatV2 *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
    {
    }
  
@@ -102,7 +102,7 @@ void ConcatV2GraphBuilder::build(const tensorflow::NodeDef &node,
    TensorName output_name(node.name(), 0);
    tensor_names->enroll(output_name, concat_node);
  
-  auto update = stdex::make_unique<TFConcatV2GraphUpdate>(concat_node, input_names);
+  auto update = std::make_unique<TFConcatV2GraphUpdate>(concat_node, input_names);
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Const.cpp b/compiler/moco/import/src/Nodes/Const.cpp

index 15ea717dbb261f11cd73fa2868875034bbf7afba..7744cf8890387cadfd102fa5b43a3832fc39dd17 100644 (file)
--- a/compiler/moco/import/src/Nodes/Const.cpp
+++ b/compiler/moco/import/src/Nodes/Const.cpp
@@ -228,7 +228,7 @@ void ConstGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderConte
        read_value_float32(const_node, num_elements, input_tensor);
        break;
  
-    // TODO support other types
+      // TODO support other types
  
      default:
        assert(false);
diff --git a/compiler/moco/import/src/Nodes/Conv2D.cpp b/compiler/moco/import/src/Nodes/Conv2D.cpp

index e6b98dcd1c14240ec65c96a4b405e8013487a88f..acb9f76c68b859dfc54377ded7925d190df6d1a0 100644 (file)
--- a/compiler/moco/import/src/Nodes/Conv2D.cpp
+++ b/compiler/moco/import/src/Nodes/Conv2D.cpp
@@ -24,10 +24,10 @@
  
  #include <loco.h>
  #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  #include <oops/UserExn.h>
  
+#include <memory>
  #include <cassert>
  #include <stdexcept>
  #include <algorithm>
@@ -131,7 +131,7 @@ void Conv2DGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCont
    input_names.push_back(TensorName(node.input(1))); // kernel
  
    // Record ifm inputs to featureEncode_node
-  auto tfconv2d_update = stdex::make_unique<TFConv2DGraphUpdate>(conv2d, input_names);
+  auto tfconv2d_update = std::make_unique<TFConv2DGraphUpdate>(conv2d, input_names);
  
    updates->enroll(std::move(tfconv2d_update));
  }
diff --git a/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp b/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp

index 74c6605ab60eb1354a02a63867b66a03d6daa5fb..10fee9a8e692790ab41b26d9cd6999ddea4ba2cd 100644 (file)
--- a/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp
+++ b/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp
@@ -21,10 +21,11 @@
  #include "Convert.h"
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  #include <oops/UserExn.h>
  
+#include <memory>
+
  namespace
  {
  using namespace moco;
@@ -34,7 +35,7 @@ class Conv2DBackpropInputGraphUpdate final : public GraphUpdate
  {
  public:
    Conv2DBackpropInputGraphUpdate(TFConv2DBackpropInput *node, std::vector<TensorName> names)
-      : _node(node), _input_names(names)
+    : _node(node), _input_names(names)
    {
      // DO NOTHING
    }
@@ -132,7 +133,7 @@ void Conv2DBackpropInputGraphBuilder::build(const tensorflow::NodeDef &node,
  
    // update
    auto conv2d_backprop_update =
-      stdex::make_unique<Conv2DBackpropInputGraphUpdate>(conv2d_backprop, input_names);
+    std::make_unique<Conv2DBackpropInputGraphUpdate>(conv2d_backprop, input_names);
  
    updates->enroll(std::move(conv2d_backprop_update));
  }
diff --git a/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp b/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp

index 3991a4d5187c8518298ceb1fd7bc9620ee9066d9..62e57207d9ae578dc75a209ef5183683a3a49852 100644 (file)
--- a/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp
+++ b/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp
@@ -24,9 +24,9 @@
  
  #include <plier/tf/Convert.h>
  #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
  #include <oops/UserExn.h>
  
+#include <memory>
  #include <cassert>
  
  using namespace plier::tf;
@@ -39,7 +39,7 @@ class TFDepthwiseConv2dNativeGraphUpdate final : public GraphUpdate
  {
  public:
    TFDepthwiseConv2dNativeGraphUpdate(TFDepthwiseConv2dNative *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
    {
    }
  
@@ -139,8 +139,8 @@ void DepthwiseConv2dNativeGraphBuilder::build(const tensorflow::NodeDef &node,
    input_names.push_back(TensorName(node.input(1))); // kernel
  
    // Record ifm inputs to featureEncode_node
-  auto tfdepthwiseconv2dnative_update = stdex::make_unique<TFDepthwiseConv2dNativeGraphUpdate>(
-      depthwiseconv2d_native_node, input_names);
+  auto tfdepthwiseconv2dnative_update =
+    std::make_unique<TFDepthwiseConv2dNativeGraphUpdate>(depthwiseconv2d_native_node, input_names);
  
    updates->enroll(std::move(tfdepthwiseconv2dnative_update));
  }
diff --git a/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp b/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp

index d2fa3d1eb80d12f3d3257af54be823e2774cb5da..0bd354dc5881c3621abb503c5c32e9a03598aaec 100644 (file)
--- a/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp
+++ b/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp
@@ -24,8 +24,8 @@
  
  #include <plier/tf/Convert.h>
  #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <cassert>
  
  using namespace plier::tf;
@@ -39,7 +39,7 @@ class TFFakeQuantWithMinMaxVarsGraphUpdate final : public GraphUpdate
  public:
    TFFakeQuantWithMinMaxVarsGraphUpdate(TFFakeQuantWithMinMaxVars *node,
                                         std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
    {
    }
  
@@ -115,7 +115,7 @@ void FakeQuantWithMinMaxVarsGraphBuilder::build(const tensorflow::NodeDef &node,
  
    // Record ifm inputs to featureEncode_node
    auto tffakequant_update =
-      stdex::make_unique<TFFakeQuantWithMinMaxVarsGraphUpdate>(fakequant_node, input_names);
+    std::make_unique<TFFakeQuantWithMinMaxVarsGraphUpdate>(fakequant_node, input_names);
  
    updates->enroll(std::move(tffakequant_update));
  }
diff --git a/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp b/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp

index 59f98017c8a7a0ad5bbd8323eceede624b7ba172..8fc439ae3aa0ea057cb2fd44c8599854c4763c72 100644 (file)
--- a/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp
+++ b/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp
@@ -19,9 +19,10 @@
  #include <moco/IR/Nodes/TFFusedBatchNorm.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  
+#include <memory>
+
  namespace
  {
  
@@ -34,7 +35,7 @@ class FusedBatchNormGraphUpdate final : public GraphUpdate
  {
  public:
    FusedBatchNormGraphUpdate(TFFusedBatchNorm *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
    {
    }
  
@@ -95,7 +96,7 @@ void FusedBatchNormGraphBuilder::build(const tensorflow::NodeDef &node,
    fbn_input_names.push_back(TensorName(node.input(3))); // mean
    fbn_input_names.push_back(TensorName(node.input(4))); // variance
  
-  auto tf_fbn_update = stdex::make_unique<FusedBatchNormGraphUpdate>(tf_fbn, fbn_input_names);
+  auto tf_fbn_update = std::make_unique<FusedBatchNormGraphUpdate>(tf_fbn, fbn_input_names);
    updates->enroll(std::move(tf_fbn_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Identity.cpp b/compiler/moco/import/src/Nodes/Identity.cpp

index 8ca0e2d01bb3eb6853ffef62d83287079ddb3bef..c3b912b48c45c33d2f2b4ac20b9f79de14a360fa 100644 (file)
--- a/compiler/moco/import/src/Nodes/Identity.cpp
+++ b/compiler/moco/import/src/Nodes/Identity.cpp
@@ -20,8 +20,8 @@
  
  #include <moco/Names.h>
  #include <loco.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <vector>
  
  namespace
@@ -33,7 +33,7 @@ class TFIdentityGraphUpdate final : public GraphUpdate
  {
  public:
    TFIdentityGraphUpdate(TFIdentity *node, const std::vector<TensorName> &names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
    {
    }
  
@@ -88,7 +88,7 @@ void IdentityGraphBuilder::build(const tensorflow::NodeDef &node,
    {
      names.emplace_back(TensorName(node.input(i)));
    }
-  auto update = stdex::make_unique<TFIdentityGraphUpdate>(identity_node, names);
+  auto update = std::make_unique<TFIdentityGraphUpdate>(identity_node, names);
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/MaxPool.cpp b/compiler/moco/import/src/Nodes/MaxPool.cpp

index 63275a3b876545e44bd76813384f39d5a25802fd..cf4b212248293894d0af2a2980e0396e5f0ffad0 100644 (file)
--- a/compiler/moco/import/src/Nodes/MaxPool.cpp
+++ b/compiler/moco/import/src/Nodes/MaxPool.cpp
@@ -24,10 +24,10 @@
  
  #include <loco.h>
  #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  #include <oops/UserExn.h>
  
+#include <memory>
  #include <cassert>
  #include <stdexcept>
  
@@ -40,7 +40,7 @@ class TFMaxPoolGraphUpdate final : public GraphUpdate
  {
  public:
    TFMaxPoolGraphUpdate(TFMaxPool *node, const TensorName &name)
-      : _maxpool_node(node), _input_name(name)
+    : _maxpool_node(node), _input_name(name)
    {
    }
  
@@ -132,7 +132,7 @@ void MaxPoolGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
    tensor_names->enroll(output_name, maxPool_node);
  
    // Record ifm inputs to featureEncode_node
-  auto update = stdex::make_unique<TFMaxPoolGraphUpdate>(maxPool_node, TensorName(node.input(0)));
+  auto update = std::make_unique<TFMaxPoolGraphUpdate>(maxPool_node, TensorName(node.input(0)));
  
    updates->enroll(std::move(update));
  }
diff --git a/compiler/moco/import/src/Nodes/Maximum.cpp b/compiler/moco/import/src/Nodes/Maximum.cpp

index 43bbbabe6aed5440686b6f2cd30084d90ed1b0da..d2d039f2723e3d49a5e628ead17f200b8b115ffd 100644 (file)
--- a/compiler/moco/import/src/Nodes/Maximum.cpp
+++ b/compiler/moco/import/src/Nodes/Maximum.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFMaximum.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -80,7 +81,7 @@ void MaximumGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
    add_input_names.push_back(TensorName(node.input(0))); // x
    add_input_names.push_back(TensorName(node.input(1))); // y
  
-  auto tf_maximum_update = stdex::make_unique<TFMaximumGraphUpdate>(tf_maximum, add_input_names);
+  auto tf_maximum_update = std::make_unique<TFMaximumGraphUpdate>(tf_maximum, add_input_names);
    updates->enroll(std::move(tf_maximum_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Mean.cpp b/compiler/moco/import/src/Nodes/Mean.cpp

index 30fb0f1f7e187f4dd8dea69c20834fde8b21981c..3f559bc4117ee75dd5b215b204f85aa9d11bd4b1 100644 (file)
--- a/compiler/moco/import/src/Nodes/Mean.cpp
+++ b/compiler/moco/import/src/Nodes/Mean.cpp
@@ -19,9 +19,10 @@
  #include <moco/IR/Nodes/TFMean.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  
+#include <memory>
+
  namespace
  {
  using namespace moco;
@@ -34,7 +35,7 @@ class MeanGraphUpdate final : public GraphUpdate
  public:
    MeanGraphUpdate(TFMean *node, const TensorName &&input_name,
                    const TensorName &&reduction_indices_name)
-      : _node(node), _input_name(input_name), _reduction_indices_name(reduction_indices_name)
+    : _node(node), _input_name(input_name), _reduction_indices_name(reduction_indices_name)
    {
      // DO NOTHING
    }
@@ -91,8 +92,8 @@ void MeanGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
    TensorName output_name(node.name(), 0);
    tensor_names->enroll(output_name, tf_mean);
  
-  auto update = stdex::make_unique<MeanGraphUpdate>(tf_mean, TensorName(node.input(0)),
-                                                    TensorName(node.input(1)));
+  auto update = std::make_unique<MeanGraphUpdate>(tf_mean, TensorName(node.input(0)),
+                                                  TensorName(node.input(1)));
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Mul.cpp b/compiler/moco/import/src/Nodes/Mul.cpp

index ab926b59ef854863eb863f1ae8286880379ff6af..91c5a60e5a508647cfc031c106012403b74d6867 100644 (file)
--- a/compiler/moco/import/src/Nodes/Mul.cpp
+++ b/compiler/moco/import/src/Nodes/Mul.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFMul.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -78,7 +79,7 @@ void MulGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext
    add_input_names.push_back(TensorName(node.input(0))); // x
    add_input_names.push_back(TensorName(node.input(1))); // y
  
-  auto tf_mul_update = stdex::make_unique<TFMulGraphUpdate>(tf_mul, add_input_names);
+  auto tf_mul_update = std::make_unique<TFMulGraphUpdate>(tf_mul, add_input_names);
    updates->enroll(std::move(tf_mul_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Pack.cpp b/compiler/moco/import/src/Nodes/Pack.cpp

index 45815a30e689c1817e1bec1fe7ec094ffea1ac00..153ee44efe13aeb61608164ec5397555a005713f 100644 (file)
--- a/compiler/moco/import/src/Nodes/Pack.cpp
+++ b/compiler/moco/import/src/Nodes/Pack.cpp
@@ -23,9 +23,9 @@
  
  #include <loco.h>
  #include <loco/IR/NodeShape.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  
+#include <memory>
  #include <cassert>
  
  namespace
@@ -95,7 +95,7 @@ void PackGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
    TensorName output_name(node.name(), 0);
    tensor_names->enroll(output_name, pack_node);
  
-  auto update = stdex::make_unique<TFPackGraphUpdate>(pack_node, input_names);
+  auto update = std::make_unique<TFPackGraphUpdate>(pack_node, input_names);
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Pad.cpp b/compiler/moco/import/src/Nodes/Pad.cpp

index 262a68fa02719172e89658c5617a4dcf954709f4..c1f466b445d6d8631759d50c3bd464390e45633a 100644 (file)
--- a/compiler/moco/import/src/Nodes/Pad.cpp
+++ b/compiler/moco/import/src/Nodes/Pad.cpp
@@ -19,9 +19,10 @@
  #include <moco/IR/Nodes/TFPad.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  
+#include <memory>
+
  namespace
  {
  
@@ -84,7 +85,7 @@ void PadGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext
    add_input_names.push_back(TensorName(node.input(1))); // paddings
  
    // Queue node input update
-  auto tf_pad_update = stdex::make_unique<TFPadGraphUpdate>(tf_pad, add_input_names);
+  auto tf_pad_update = std::make_unique<TFPadGraphUpdate>(tf_pad, add_input_names);
    updates->enroll(std::move(tf_pad_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/RealDiv.cpp b/compiler/moco/import/src/Nodes/RealDiv.cpp

index de3d576735883f7967f631d6d6a1608d98366118..c747a2fb32753547d19a8d9d14840c018a860776 100644 (file)
--- a/compiler/moco/import/src/Nodes/RealDiv.cpp
+++ b/compiler/moco/import/src/Nodes/RealDiv.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFRealDiv.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -79,7 +80,7 @@ void RealDivGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
    div_input_names.push_back(TensorName(node.input(0))); // x
    div_input_names.push_back(TensorName(node.input(1))); // y
  
-  auto tf_div_update = stdex::make_unique<TFRealDivGraphUpdate>(tf_div, div_input_names);
+  auto tf_div_update = std::make_unique<TFRealDivGraphUpdate>(tf_div, div_input_names);
    updates->enroll(std::move(tf_div_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Relu.cpp b/compiler/moco/import/src/Nodes/Relu.cpp

index eedc8155d63600ce34113f28fdfa47446a97e7df..c99e484e2605c57607f4d5c470207e06446e203a 100644 (file)
--- a/compiler/moco/import/src/Nodes/Relu.cpp
+++ b/compiler/moco/import/src/Nodes/Relu.cpp
@@ -20,8 +20,8 @@
  
  #include <moco/Names.h>
  #include <loco.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <cassert>
  #include <stdexcept>
  
@@ -79,7 +79,7 @@ void ReluGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
    tensor_names->enroll(output_name, relu_node);
  
    // Queue node input update
-  auto update = stdex::make_unique<TFReluGraphUpdate>(relu_node, TensorName(node.input(0)));
+  auto update = std::make_unique<TFReluGraphUpdate>(relu_node, TensorName(node.input(0)));
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Relu6.cpp b/compiler/moco/import/src/Nodes/Relu6.cpp

index 4700ba4087c8147937ee651d4d1a1f8ee5957451..b7bbac5ce70b0a8abaef737df93bae83fb5b9110 100644 (file)
--- a/compiler/moco/import/src/Nodes/Relu6.cpp
+++ b/compiler/moco/import/src/Nodes/Relu6.cpp
@@ -18,7 +18,7 @@
  
  #include <moco/IR/Nodes/TFRelu6.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
  namespace
  {
@@ -73,7 +73,7 @@ void Relu6GraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderConte
    tensor_names->enroll(output_name, relu_node);
  
    // Queue node input update
-  auto update = stdex::make_unique<TFRelu6GraphUpdate>(relu_node, TensorName(node.input(0)));
+  auto update = std::make_unique<TFRelu6GraphUpdate>(relu_node, TensorName(node.input(0)));
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Reshape.cpp b/compiler/moco/import/src/Nodes/Reshape.cpp

index 26e22513f1f7dab0200f665c086a913efc2561c0..bdcafbf70e631fb9c1e3cbc89e23a5e5c5cd29b8 100644 (file)
--- a/compiler/moco/import/src/Nodes/Reshape.cpp
+++ b/compiler/moco/import/src/Nodes/Reshape.cpp
@@ -21,8 +21,8 @@
  #include <moco/Names.h>
  #include <plier/tf/Convert.h>
  #include <loco.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <cassert>
  #include <stdexcept>
  
@@ -94,7 +94,7 @@ void ReshapeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
    input_names.push_back(TensorName(node.input(1))); // shape
  
    // Queue node input update
-  auto update = stdex::make_unique<ReshapeGraphUpdate>(reshape, input_names);
+  auto update = std::make_unique<ReshapeGraphUpdate>(reshape, input_names);
  
    updates->enroll(std::move(update));
  }
diff --git a/compiler/moco/import/src/Nodes/Rsqrt.cpp b/compiler/moco/import/src/Nodes/Rsqrt.cpp

index 979ac90c9972710a9cb1f1505bcbfafe0b027dbc..f96d99b68d2ca70eb667c05f65e125ee6cd2cf16 100644 (file)
--- a/compiler/moco/import/src/Nodes/Rsqrt.cpp
+++ b/compiler/moco/import/src/Nodes/Rsqrt.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFRsqrt.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -74,8 +75,7 @@ void RsqrtGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderConte
    tensor_names->enroll(output_name, tf_rsqrt);
  
    // Queue node input update
-  auto tf_rsqrt_update =
-      stdex::make_unique<TFRsqrtGraphUpdate>(tf_rsqrt, TensorName(node.input(0)));
+  auto tf_rsqrt_update = std::make_unique<TFRsqrtGraphUpdate>(tf_rsqrt, TensorName(node.input(0)));
    updates->enroll(std::move(tf_rsqrt_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Shape.cpp b/compiler/moco/import/src/Nodes/Shape.cpp

index 1e112ebb0d719cb39ba871947501132a24c70da2..b7eb339efafd96690d4946f52cd8d91b6b5b100d 100644 (file)
--- a/compiler/moco/import/src/Nodes/Shape.cpp
+++ b/compiler/moco/import/src/Nodes/Shape.cpp
@@ -19,9 +19,10 @@
  #include <moco/IR/Nodes/TFShape.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  
+#include <memory>
+
  namespace
  {
  using namespace moco;
@@ -33,7 +34,7 @@ class ShapeGraphUpdate final : public GraphUpdate
  {
  public:
    ShapeGraphUpdate(TFShape *node, const TensorName &&input_name)
-      : _node(node), _input_name(input_name)
+    : _node(node), _input_name(input_name)
    {
      // DO NOTHING
    }
@@ -93,7 +94,7 @@ void ShapeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderConte
    TensorName output_name(node.name(), 0);
    tensor_names->enroll(output_name, tf_shape);
  
-  auto update = stdex::make_unique<ShapeGraphUpdate>(tf_shape, TensorName(node.input(0)));
+  auto update = std::make_unique<ShapeGraphUpdate>(tf_shape, TensorName(node.input(0)));
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Softmax.cpp b/compiler/moco/import/src/Nodes/Softmax.cpp

index 6f2c609ff3c562efd5ad444b648d2db5fb66bf05..4fa962750018dab07a17547ab0913cc8c61a3af1 100644 (file)
--- a/compiler/moco/import/src/Nodes/Softmax.cpp
+++ b/compiler/moco/import/src/Nodes/Softmax.cpp
@@ -19,21 +19,22 @@
  #include <moco/IR/Nodes/TFSoftmax.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  
+#include <memory>
+
  namespace
  {
  using namespace moco;
  
  /**
-* @brief GraphUpdate for Softmax node
-*/
+ * @brief GraphUpdate for Softmax node
+ */
  class SoftmaxGraphUpdate final : public GraphUpdate
  {
  public:
    SoftmaxGraphUpdate(TFSoftmax *node, const TensorName &&input_name)
-      : _node(node), _input_name(input_name)
+    : _node(node), _input_name(input_name)
    {
      // DO NOTHING
    }
@@ -79,7 +80,7 @@ void SoftmaxGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
    TensorName output_name(node.name(), 0);
    tensor_names->enroll(output_name, tf_softmax);
  
-  auto update = stdex::make_unique<SoftmaxGraphUpdate>(tf_softmax, TensorName(node.input(0)));
+  auto update = std::make_unique<SoftmaxGraphUpdate>(tf_softmax, TensorName(node.input(0)));
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Sqrt.cpp b/compiler/moco/import/src/Nodes/Sqrt.cpp

index f891e48f69b17f541bafe4edb80c40096844bb86..0dbe15edee861e78697c910ba9b21525d4794107 100644 (file)
--- a/compiler/moco/import/src/Nodes/Sqrt.cpp
+++ b/compiler/moco/import/src/Nodes/Sqrt.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFSqrt.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -74,7 +75,7 @@ void SqrtGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
    tensor_names->enroll(output_name, tf_sqrt);
  
    // Queue node input update
-  auto tf_sqrt_update = stdex::make_unique<TFSqrtGraphUpdate>(tf_sqrt, TensorName(node.input(0)));
+  auto tf_sqrt_update = std::make_unique<TFSqrtGraphUpdate>(tf_sqrt, TensorName(node.input(0)));
    updates->enroll(std::move(tf_sqrt_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/SquaredDifference.cpp b/compiler/moco/import/src/Nodes/SquaredDifference.cpp

index 17a1fe93db3476182ede4d4e439fcf9ba53746ad..441f02a19c90f8ae077bae2491602a4bb353705e 100644 (file)
--- a/compiler/moco/import/src/Nodes/SquaredDifference.cpp
+++ b/compiler/moco/import/src/Nodes/SquaredDifference.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFSquaredDifference.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -33,7 +34,7 @@ class TFSquaredDifferenceGraphUpdate final : public GraphUpdate
  {
  public:
    TFSquaredDifferenceGraphUpdate(TFSquaredDifference *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
    {
    }
  
@@ -85,7 +86,7 @@ void SquaredDifferenceGraphBuilder::build(const tensorflow::NodeDef &node,
  
    // Queue node input update
    auto tf_sqrt_update =
-      stdex::make_unique<TFSquaredDifferenceGraphUpdate>(tf_sqdiff, add_input_names);
+    std::make_unique<TFSquaredDifferenceGraphUpdate>(tf_sqdiff, add_input_names);
    updates->enroll(std::move(tf_sqrt_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Squeeze.cpp b/compiler/moco/import/src/Nodes/Squeeze.cpp

index 1b4ebae6f18291cda64032fb0798fa68c1e788e3..b013b840f1ebba4cfa5bcd162caa3690b009e57f 100644 (file)
--- a/compiler/moco/import/src/Nodes/Squeeze.cpp
+++ b/compiler/moco/import/src/Nodes/Squeeze.cpp
@@ -21,10 +21,11 @@
  #include <moco/Names.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  #include <oops/UserExn.h>
  
+#include <memory>
+
  namespace
  {
  using namespace moco;
@@ -36,7 +37,7 @@ class SqueezeGraphUpdate final : public GraphUpdate
  {
  public:
    SqueezeGraphUpdate(TFSqueeze *node, const TensorName &&input_name)
-      : _node(node), _input_name(input_name)
+    : _node(node), _input_name(input_name)
    {
      // DO NOTHING
    }
@@ -105,7 +106,7 @@ void SqueezeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
    TensorName output_name(node.name(), 0);
    tensor_names->enroll(output_name, tf_squeeze);
  
-  auto update = stdex::make_unique<SqueezeGraphUpdate>(tf_squeeze, TensorName(node.input(0)));
+  auto update = std::make_unique<SqueezeGraphUpdate>(tf_squeeze, TensorName(node.input(0)));
    updates->enroll(std::move(update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/StopGradient.cpp b/compiler/moco/import/src/Nodes/StopGradient.cpp

index 9caec6943e060cce839243d621742be79ed0dd81..82f49dc4adc7fc72fa40468a792bb5ef8276ae9c 100644 (file)
--- a/compiler/moco/import/src/Nodes/StopGradient.cpp
+++ b/compiler/moco/import/src/Nodes/StopGradient.cpp
@@ -20,7 +20,8 @@
  
  #include <loco.h>
  #include <plier/tf/Convert.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -80,7 +81,7 @@ void StopGradientGraphBuilder::build(const tensorflow::NodeDef &node,
  
    // Queue node input update
    auto tf_stopgradient_update =
-      stdex::make_unique<TFStopGradientGraphUpdate>(tf_stopgradient, TensorName(node.input(0)));
+    std::make_unique<TFStopGradientGraphUpdate>(tf_stopgradient, TensorName(node.input(0)));
    updates->enroll(std::move(tf_stopgradient_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/StridedSlice.cpp b/compiler/moco/import/src/Nodes/StridedSlice.cpp

index 06d388be0dd46978510cdf517bf9603517b0ded9..b0744a7e2e1ce3e2857577c146046b8475de3af3 100644 (file)
--- a/compiler/moco/import/src/Nodes/StridedSlice.cpp
+++ b/compiler/moco/import/src/Nodes/StridedSlice.cpp
@@ -24,10 +24,11 @@
  #include "Convert.h"
  
  #include <loco.h>
-#include <stdex/Memory.h>
  #include <plier/tf/Convert.h>
  #include <oops/UserExn.h>
  
+#include <memory>
+
  namespace
  {
  using namespace moco;
@@ -36,7 +37,7 @@ class TFStridedSliceGraphUpdate final : public GraphUpdate
  {
  public:
    TFStridedSliceGraphUpdate(TFStridedSlice *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
    {
    }
  
@@ -179,7 +180,7 @@ void StridedSliceGraphBuilder::build(const tensorflow::NodeDef &node,
    input_names.push_back(TensorName(node.input(2))); // end
    input_names.push_back(TensorName(node.input(3))); // strides
  
-  auto tfconv2d_update = stdex::make_unique<TFStridedSliceGraphUpdate>(stridedslice, input_names);
+  auto tfconv2d_update = std::make_unique<TFStridedSliceGraphUpdate>(stridedslice, input_names);
  
    updates->enroll(std::move(tfconv2d_update));
  }
diff --git a/compiler/moco/import/src/Nodes/Sub.cpp b/compiler/moco/import/src/Nodes/Sub.cpp

index bdad81d67792cacffacf491f06b81c89205a2185..4a657663e411728e25644adb1dd8b38b0e43b4cf 100644 (file)
--- a/compiler/moco/import/src/Nodes/Sub.cpp
+++ b/compiler/moco/import/src/Nodes/Sub.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFSub.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -78,7 +79,7 @@ void SubGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext
    sub_input_names.push_back(TensorName(node.input(0))); // x
    sub_input_names.push_back(TensorName(node.input(1))); // y
  
-  auto tf_sub_update = stdex::make_unique<TFSubGraphUpdate>(tf_sub, sub_input_names);
+  auto tf_sub_update = std::make_unique<TFSubGraphUpdate>(tf_sub, sub_input_names);
    updates->enroll(std::move(tf_sub_update));
  }
  
diff --git a/compiler/moco/import/src/Nodes/Tanh.cpp b/compiler/moco/import/src/Nodes/Tanh.cpp

index c89fa862a6acf9d34e2d4b98bb2528968ab3f497..3a0b0a3343b466e8791b2905ca455e4f24e3722f 100644 (file)
--- a/compiler/moco/import/src/Nodes/Tanh.cpp
+++ b/compiler/moco/import/src/Nodes/Tanh.cpp
@@ -19,7 +19,8 @@
  #include <moco/IR/Nodes/TFTanh.h>
  
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  namespace
  {
@@ -74,7 +75,7 @@ void TanhGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
    tensor_names->enroll(output_name, tf_tanh);
  
    // Queue node input update
-  auto tf_tanh_update = stdex::make_unique<TFTanhGraphUpdate>(tf_tanh, TensorName(node.input(0)));
+  auto tf_tanh_update = std::make_unique<TFTanhGraphUpdate>(tf_tanh, TensorName(node.input(0)));
    updates->enroll(std::move(tf_tanh_update));
  }
  
diff --git a/compiler/moco/import/src/TestHelper.test.cpp b/compiler/moco/import/src/TestHelper.test.cpp

index 06c3dd3727f03ee553e51f283bd5ca82569892af..d0390ad326261c2bfee8aabb3c06ef26da37a307 100644 (file)
--- a/compiler/moco/import/src/TestHelper.test.cpp
+++ b/compiler/moco/import/src/TestHelper.test.cpp
@@ -17,7 +17,8 @@
  #include "TestHelper.h"
  
  #include <moco/IR/Nodes/TFConst.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -29,7 +30,7 @@ namespace test
  TFNodeBuildTester::TFNodeBuildTester()
  {
    _graph = loco::make_graph();
-  _tensor_names = stdex::make_unique<moco::SymbolTable>();
+  _tensor_names = std::make_unique<moco::SymbolTable>();
  }
  
  void TFNodeBuildTester::inputs(const std::vector<std::string> &names)
@@ -71,8 +72,8 @@ void TFNodeBuildTester::run(tensorflow::NodeDef &nodedef, moco::GraphBuilder &gr
  {
    assert(_output != nullptr);
  
-  auto node_defs = stdex::make_unique<moco::NodeDefTable>();
-  auto updates = stdex::make_unique<moco::UpdateQueue>();
+  auto node_defs = std::make_unique<moco::NodeDefTable>();
+  auto updates = std::make_unique<moco::UpdateQueue>();
  
    moco::GraphBuilderContext gb_context(_graph.get(), node_defs.get(), _tensor_names.get(),
                                         updates.get());
diff --git a/compiler/moco/lang/CMakeLists.txt b/compiler/moco/lang/CMakeLists.txt

index a64fdf92a5adf2e5ad4537fe8ae6378fab89c29e..2543f2563171eedfa3e3b9bcb97cf63a7a50c02e 100644 (file)
--- a/compiler/moco/lang/CMakeLists.txt
+++ b/compiler/moco/lang/CMakeLists.txt
@@ -7,7 +7,6 @@ target_include_directories(moco_lang PRIVATE src)
  target_include_directories(moco_lang PUBLIC include)
  target_link_libraries(moco_lang PUBLIC loco)
  target_link_libraries(moco_lang PRIVATE nncc_common)
-target_link_libraries(moco_lang PRIVATE stdex)
  install(TARGETS moco_lang DESTINATION lib)  # moco_tf_frontend requires moco_lang
  
  if(NOT ENABLE_TEST)
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h b/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h

index 43e620d2465064bb5d33782569a9f295b3cf4c5b..69d86743640c8ce8394413b7d29052ee528d990b 100644 (file)
--- a/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h
@@ -68,7 +68,7 @@ node {
   *        Note that this convention is against loco canonical's convention.
   */
  class TFConv2DBackpropInput final
-    : public FixedArityNode<3, TFNodeImpl<TFOpcode::Conv2DBackpropInput>>
+  : public FixedArityNode<3, TFNodeImpl<TFOpcode::Conv2DBackpropInput>>
  {
  public:
    loco::Node *input_sizes(void) const { return at(0)->node(); }
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h b/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h

index aefc0b5d957e716d7598d073d1d715b8f8c8dda4..2d7fa0c10029a1738fd2fc9dfa85101a87d72989 100644 (file)
--- a/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h
@@ -25,7 +25,7 @@ namespace moco
  {
  
  class TFDepthwiseConv2dNative final
-    : public FixedArityNode<2, TFNodeImpl<TFOpcode::DepthwiseConv2dNative>>
+  : public FixedArityNode<2, TFNodeImpl<TFOpcode::DepthwiseConv2dNative>>
  {
  public:
    loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h b/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h

index ec54da596faad01d4479db76469ea2fc764449f4..55baac7de0755ea2b87efa06c10b855287b18c87 100644 (file)
--- a/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h
@@ -25,7 +25,7 @@ namespace moco
  {
  
  class TFFakeQuantWithMinMaxVars final
-    : public FixedArityNode<3, TFNodeImpl<TFOpcode::FakeQuantWithMinMaxVars>>
+  : public FixedArityNode<3, TFNodeImpl<TFOpcode::FakeQuantWithMinMaxVars>>
  {
  public:
    loco::Node *inputs(void) const { return at(0)->node(); }
diff --git a/compiler/moco/lang/src/IR/TFDialect.cpp b/compiler/moco/lang/src/IR/TFDialect.cpp

index 35bbcc2c90ee3862467f7248d40420208645749c..959ef98f5b45760cc485af445bb700a2204d3f02 100644 (file)
--- a/compiler/moco/lang/src/IR/TFDialect.cpp
+++ b/compiler/moco/lang/src/IR/TFDialect.cpp
@@ -21,8 +21,7 @@
  #include <loco/IR/GraphInputIndex.h>
  #include <loco/IR/GraphOutputIndex.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  #include <stdexcept>
  
@@ -78,8 +77,8 @@ namespace moco
  
  TFDialect::TFDialect()
  {
-  service<loco::GraphInputIndexQueryService>(stdex::make_unique<GiiQueryServiceImpl>());
-  service<loco::GraphOutputIndexQueryService>(stdex::make_unique<GoiQueryServiceImpl>());
+  service<loco::GraphInputIndexQueryService>(std::make_unique<GiiQueryServiceImpl>());
+  service<loco::GraphOutputIndexQueryService>(std::make_unique<GoiQueryServiceImpl>());
  }
  
  loco::Dialect *TFDialect::get(void)
diff --git a/compiler/moco/lang/src/IR/TFNode.cpp b/compiler/moco/lang/src/IR/TFNode.cpp

index ab9356196550bea41b48ffb4d4409e9441e9fcc3..55c0e0c64ae44770af9b1a4ede5abc13bace3d00 100644 (file)
--- a/compiler/moco/lang/src/IR/TFNode.cpp
+++ b/compiler/moco/lang/src/IR/TFNode.cpp
@@ -17,6 +17,7 @@
  #include "moco/IR/TFNode.h"
  #include "moco/IR/TFDialect.h"
  
+#include <memory>
  #include <cassert>
  
  namespace moco
@@ -26,9 +27,6 @@ const loco::Dialect *TFNode::dialect(void) const { return TFDialect::get(); }
  
  } // namespace moco
  
-// TODO move this to appropriate place
-#include <stdex/Memory.h>
-
  namespace moco
  {
  
@@ -60,7 +58,7 @@ loco::GraphInputIndex index(const TFPlaceholder *node)
  
  void index(TFPlaceholder *node, const loco::GraphInputIndex index)
  {
-  node->annot(stdex::make_unique<GraphInputIndexAnnotation>(index));
+  node->annot(std::make_unique<GraphInputIndexAnnotation>(index));
  }
  
  loco::TensorShape tensor_shape(const TFPlaceholder *node)
diff --git a/compiler/moco/pass/CMakeLists.txt b/compiler/moco/pass/CMakeLists.txt

index 1eba8628379c1d2ca18cea11fc65ef3351884d64..40c3d5a498449571255f49957e5cf07f73710831 100644 (file)
--- a/compiler/moco/pass/CMakeLists.txt
+++ b/compiler/moco/pass/CMakeLists.txt
@@ -9,7 +9,6 @@ target_link_libraries(moco_pass PUBLIC loco)
  target_link_libraries(moco_pass PUBLIC logo_core)
  target_link_libraries(moco_pass PUBLIC moco_lang)
  target_link_libraries(moco_pass PRIVATE moco_support)
-target_link_libraries(moco_pass PRIVATE stdex)
  target_link_libraries(moco_pass PRIVATE oops)
  install(TARGETS moco_pass DESTINATION lib)
  
@@ -23,4 +22,3 @@ GTest_AddTest(moco_pass_test ${TESTS})
  target_include_directories(moco_pass_test PRIVATE src)
  target_link_libraries(moco_pass_test moco_pass)
  target_link_libraries(moco_pass_test moco_support)
-target_link_libraries(moco_pass_test stdex)
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h

index 5528b86121c5f31c8efedd2ac6ff3538b083e1df..a5e25a0cea3a1854c93830cdbd596b2248a05baf 100644 (file)
--- a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h
@@ -26,7 +26,7 @@ namespace moco
  
  /**
   * @brief  Constant folder for Const + Mul -> Const
-*/
+ */
  class ConstantFoldMul : public logo::Pass
  {
  public:
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h

index fc6bc0ace16ed3b084b356da95cc77185b32102f..f99c633acaf34c714253dd40cbbc40f7c95077b1 100644 (file)
--- a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h
@@ -28,7 +28,7 @@ namespace moco
  
  /**
   * @brief  Constant folder for Const + Pack -> Const
-*/
+ */
  class ConstantFoldPack : public logo::Pass
  {
  public:
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h

index 1e3492c2c059b2db10423f42e16ccea25cace882..f57bdc05ed4fcbc16ddea24da1fe476b330761c2 100644 (file)
--- a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h
@@ -26,7 +26,7 @@ namespace moco
  
  /**
   * @brief  Constant folder for Const + StridedSlice -> Const
-*/
+ */
  class ConstantFoldStridedSlice : public logo::Pass
  {
  public:
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h b/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h

index 24e3567c08109f4fe40932f99fb954718114e339..4d5318c350202fe54c7dffc75a9c7748baca6ae6 100644 (file)
--- a/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h
@@ -26,7 +26,7 @@ namespace moco
  
  /**
   * @brief  Fuse TFAdd, TFMul to preceding TFConv2D or TFDepthWiseConv2D
-*/
+ */
  class FuseBinaryIntoPreceding : public logo::Pass
  {
  public:
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h b/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h

index ce5ea0bb09f02b1e3ae21350e57d997e855b6fc1..1910a9ac728ac0745f525a1ec271873e609a0e20 100644 (file)
--- a/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h
@@ -26,7 +26,7 @@ namespace moco
  
  /**
   * @brief  Trasform TFFusedBatchNorm into TFAdd + TFRsqrt + TFMul + TFBatchNorm
-*/
+ */
  class ResolveFusedBatchNorm : public logo::Pass
  {
  public:
diff --git a/compiler/moco/pass/src/ConstantFoldAdd.test.cpp b/compiler/moco/pass/src/ConstantFoldAdd.test.cpp

index bc9489fbdf0e9ecba74a4e8c88af937a389856a5..fdfbfb8d34efb33ddebae95e65205222d087e88f 100644 (file)
--- a/compiler/moco/pass/src/ConstantFoldAdd.test.cpp
+++ b/compiler/moco/pass/src/ConstantFoldAdd.test.cpp
@@ -19,7 +19,8 @@
  
  #include <moco/IR/TFNodes.h>
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -60,7 +61,7 @@ TEST(ConstantFoldAdd, basic_vector)
    }
    setup_output_node(&graph, add_node);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldAdd>();
+  auto pass = std::make_unique<moco::ConstantFoldAdd>();
    bool cont = true;
    while (cont)
    {
@@ -92,7 +93,7 @@ TEST(ConstantFoldAdd, basic_refinedet_1)
    }
    setup_output_node(&graph, add_node);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldAdd>();
+  auto pass = std::make_unique<moco::ConstantFoldAdd>();
    bool cont = true;
    while (cont)
    {
diff --git a/compiler/moco/pass/src/ConstantFoldHelper.cpp b/compiler/moco/pass/src/ConstantFoldHelper.cpp

index 79b04863ccefb261c74b1eea40de4aca6a1713d3..9dd5e00cd680a3b0a07f1a8aa043ade43feb15a1 100644 (file)
--- a/compiler/moco/pass/src/ConstantFoldHelper.cpp
+++ b/compiler/moco/pass/src/ConstantFoldHelper.cpp
@@ -164,7 +164,7 @@ void apply_binary_s32(const moco::TFConst *lhs, const moco::TFConst *rhs, moco::
    for (uint32_t e = 0; e < nume; e++)
    {
      output->at<loco::DataType::S32>(e) =
-        f.apply(lhs->at<loco::DataType::S32>(e), rhs->at<loco::DataType::S32>(e));
+      f.apply(lhs->at<loco::DataType::S32>(e), rhs->at<loco::DataType::S32>(e));
    }
  }
  
@@ -180,7 +180,7 @@ void apply_binary_f32(const moco::TFConst *lhs, const moco::TFConst *rhs, moco::
    for (uint32_t e = 0; e < nume; e++)
    {
      output->at<loco::DataType::FLOAT32>(e) =
-        f.apply(lhs->at<loco::DataType::FLOAT32>(e), rhs->at<loco::DataType::FLOAT32>(e));
+      f.apply(lhs->at<loco::DataType::FLOAT32>(e), rhs->at<loco::DataType::FLOAT32>(e));
    }
  }
  
diff --git a/compiler/moco/pass/src/ConstantFoldMul.test.cpp b/compiler/moco/pass/src/ConstantFoldMul.test.cpp

index 4e9b78fd4d75a153dc159035bdab49f2d023eeff..c7e7d9e65baffe062508650a56248fc5330cd636 100644 (file)
--- a/compiler/moco/pass/src/ConstantFoldMul.test.cpp
+++ b/compiler/moco/pass/src/ConstantFoldMul.test.cpp
@@ -19,7 +19,8 @@
  
  #include <moco/IR/TFNodes.h>
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -60,7 +61,7 @@ TEST(ConstantFoldMul, basic_vector)
    }
    setup_output_node(&graph, mul_node);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldMul>();
+  auto pass = std::make_unique<moco::ConstantFoldMul>();
    bool cont = true;
    while (cont)
    {
@@ -92,7 +93,7 @@ TEST(ConstantFoldMul, basic_refinedet_1)
    }
    setup_output_node(&graph, mul_node);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldMul>();
+  auto pass = std::make_unique<moco::ConstantFoldMul>();
    bool cont = true;
    while (cont)
    {
diff --git a/compiler/moco/pass/src/ConstantFoldPack.test.cpp b/compiler/moco/pass/src/ConstantFoldPack.test.cpp

index cb6eff0c8fcadafdd02c1c7dc24698fbd86d82d3..c0fa48c7b1a1a91e4314ebb8d310e802fd23d23f 100644 (file)
--- a/compiler/moco/pass/src/ConstantFoldPack.test.cpp
+++ b/compiler/moco/pass/src/ConstantFoldPack.test.cpp
@@ -19,7 +19,8 @@
  
  #include <moco/IR/TFNodes.h>
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -69,7 +70,7 @@ TEST(ConstantFoldPack, basic_scalar4_vector)
    identity->input(pack_node);
    setup_output_node(&graph, identity);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldPack>();
+  auto pass = std::make_unique<moco::ConstantFoldPack>();
    bool cont = true;
    while (cont)
    {
diff --git a/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp b/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp

index b5bada221d2fd815fa0273ac9f228f10f41698b4..3e84499775ffe6ed8dd31791376b3aa5f89e7004 100644 (file)
--- a/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp
+++ b/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp
@@ -19,7 +19,8 @@
  
  #include <moco/IR/TFNodes.h>
  #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  #include <gtest/gtest.h>
  
@@ -83,7 +84,7 @@ TEST(ConstantFoldStridedSlice, basic_matrix55_11)
    }
    setup_output_node(&graph, sslice_node);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+  auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
    bool cont = true;
    while (cont)
    {
@@ -121,7 +122,7 @@ TEST(ConstantFoldStridedSlice, basic_vector4_0)
    }
    setup_output_node(&graph, sslice_node);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+  auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
    bool cont = true;
    while (cont)
    {
@@ -157,7 +158,7 @@ TEST(ConstantFoldStridedSlice, basic_vector4_1)
    }
    setup_output_node(&graph, sslice_node);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+  auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
    bool cont = true;
    while (cont)
    {
@@ -193,7 +194,7 @@ TEST(ConstantFoldStridedSlice, basic_vector4_2)
    }
    setup_output_node(&graph, sslice_node);
  
-  auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+  auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
    bool cont = true;
    while (cont)
    {
diff --git a/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp b/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp

index f97546a80bc0ec7e4bb99c490566b923829720b2..9374dd5f95fdb89fd628be3aaa29c9a3aa086422 100644 (file)
--- a/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp
+++ b/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp
@@ -318,7 +318,7 @@ bool fuse_to_preceding(loco::Graph *graph, moco::TFMul *node)
      fused_node = fused_conv_node<FuseType::Conv2D, moco::TFConv2D>(graph, mulparam, conv2d);
    else if (auto dw_conv2d = dynamic_cast<moco::TFDepthwiseConv2dNative *>(precedingOp))
      fused_node = fused_conv_node<FuseType::DepthwiseConv2D, moco::TFDepthwiseConv2dNative>(
-        graph, mulparam, dw_conv2d);
+      graph, mulparam, dw_conv2d);
  
    // Not ready yet
    if (fused_node == nullptr)
@@ -515,7 +515,7 @@ bool FuseBinaryIntoPreceding::run(loco::Graph *graph)
          }
        }
        {
-          // TODO support Div
+        // TODO support Div
        }
  
        {
diff --git a/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp b/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp

index b66add1aead29880611faaffd3635490e4f7bab3..44e92e9a7cf8b47ac53aeb5a76c8bd97054bad0e 100644 (file)
--- a/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp
+++ b/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp
@@ -24,8 +24,6 @@
  #include <loco/IR/NodeShape.h>
  #include <loco/Service/ShapeInference.h>
  
-#include <stdex/Memory.h>
-
  namespace
  {
  
diff --git a/compiler/moco/requires.cmake b/compiler/moco/requires.cmake

index 1a7d36454215f11c0298d860169c340da29bc0b7..18b3a76aa105a22814c0e9c1d118bd0579729376 100644 (file)
--- a/compiler/moco/requires.cmake
+++ b/compiler/moco/requires.cmake
@@ -1,6 +1,5 @@
  require("loco")
  require("locop")
-require("stdex")
  require("moco-log")
  require("plier-tf")
  require("mio-tf")
diff --git a/compiler/moco/service/CMakeLists.txt b/compiler/moco/service/CMakeLists.txt

index dff0233b11565e2fa4f6e9be296c10311f4de579..5213f718ec4331ac9e27a89a16950f92830a5572 100644 (file)
--- a/compiler/moco/service/CMakeLists.txt
+++ b/compiler/moco/service/CMakeLists.txt
@@ -9,7 +9,6 @@ target_link_libraries(moco_service PUBLIC loco)
  target_link_libraries(moco_service PUBLIC moco_lang)
  target_link_libraries(moco_service PRIVATE moco_support)
  target_link_libraries(moco_service PRIVATE nncc_common)
-target_link_libraries(moco_service PRIVATE stdex)
  target_link_libraries(moco_service PRIVATE oops)
  install(TARGETS moco_service DESTINATION lib)
  
diff --git a/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp b/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp

index 98434155ebdecef69e008a47e8a157cf66cf0396..6a9864dc570890744168974d8a20701dcd3fbcf4 100644 (file)
--- a/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp
+++ b/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp
@@ -302,7 +302,7 @@ public:
      // output count is from input count, depth is from kernel 'CM' which is dim(2) * dim(3)
      auto output_feature_shape = input_feature_shape;
      output_feature_shape.depth() =
-        loco::Dimension(ker_tensor_shape.dim(2).value() * ker_tensor_shape.dim(3).value());
+      loco::Dimension(ker_tensor_shape.dim(2).value() * ker_tensor_shape.dim(3).value());
  
      auto output_plane_shape = infer_plane_shape(input_plane_shape);
  
diff --git a/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h b/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h

index 52324700a1da3a3d0be8d8986d1bd60439efb69a..c8a5476814d1bf35c842271bb8c00723009b9f46 100644 (file)
--- a/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h
+++ b/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h
@@ -136,11 +136,11 @@ protected:
      if (_padding == "VALID")
      {
        res.height =
-          (p.input.height.value() + p.stride.height.value() - p.effective_window.height.value()) /
-          p.stride.height.value();
+        (p.input.height.value() + p.stride.height.value() - p.effective_window.height.value()) /
+        p.stride.height.value();
        res.width =
-          (p.input.width.value() + p.stride.width.value() - p.effective_window.width.value()) /
-          p.stride.width.value();
+        (p.input.width.value() + p.stride.width.value() - p.effective_window.width.value()) /
+        p.stride.width.value();
      }
      else if (_padding == "SAME")
      {
diff --git a/compiler/nest/core/include/nest/expr/AddNode.h b/compiler/nest/core/include/nest/expr/AddNode.h

index b9b5afb22bca85407aa234cf3d8abe5c48b140d7..bb95692b686febf721e6b8af1e1db45e19cc8445 100644 (file)
--- a/compiler/nest/core/include/nest/expr/AddNode.h
+++ b/compiler/nest/core/include/nest/expr/AddNode.h
@@ -30,7 +30,7 @@ class AddNode final : public Node
  {
  public:
    AddNode(const std::shared_ptr<expr::Node> &lhs, const std::shared_ptr<expr::Node> &rhs)
-      : _lhs{lhs}, _rhs{rhs}
+    : _lhs{lhs}, _rhs{rhs}
    {
      // DO NOTHING
    }
diff --git a/compiler/nest/core/include/nest/expr/DerefNode.h b/compiler/nest/core/include/nest/expr/DerefNode.h

index 19adfe3b30da8ef2a16fcfc3ab27b5617906a524..8e3cc569083ed5036c2450fb802caaab8aee62b1 100644 (file)
--- a/compiler/nest/core/include/nest/expr/DerefNode.h
+++ b/compiler/nest/core/include/nest/expr/DerefNode.h
@@ -31,7 +31,7 @@ class DerefNode final : public Node
  public:
    template <typename... Args>
    DerefNode(const DomainID &id, Args &&... indicies)
-      : _id{id}, _sub{std::forward<Args>(indicies)...}
+    : _id{id}, _sub{std::forward<Args>(indicies)...}
    {
      // DO NOTHING
    }
diff --git a/compiler/nest/core/include/nest/expr/MulNode.h b/compiler/nest/core/include/nest/expr/MulNode.h

index f388b33a3c31ca54dc0bbc48b82a59a0700d0077..bbf64d9bce405684d6fea1750ae6640dd3e955bf 100644 (file)
--- a/compiler/nest/core/include/nest/expr/MulNode.h
+++ b/compiler/nest/core/include/nest/expr/MulNode.h
@@ -30,7 +30,7 @@ class MulNode final : public Node
  {
  public:
    MulNode(const std::shared_ptr<expr::Node> &lhs, const std::shared_ptr<expr::Node> &rhs)
-      : _lhs{lhs}, _rhs{rhs}
+    : _lhs{lhs}, _rhs{rhs}
    {
      // DO NOTHING
    }
diff --git a/compiler/nest/core/src/Block.test.cpp b/compiler/nest/core/src/Block.test.cpp

index d8faa0bdbb615742e880f9a4324b537ed215f3b3..c48fcfa3589bc937ee7036bc40755f90d75c725e 100644 (file)
--- a/compiler/nest/core/src/Block.test.cpp
+++ b/compiler/nest/core/src/Block.test.cpp
@@ -24,7 +24,7 @@ struct DummyNode final : public nest::stmt::Node
  {
    // Dummy Node for testing
  };
-}
+} // namespace
  
  TEST(BLOCK, use_case_1)
  {
diff --git a/compiler/nest/core/src/Closure.test.cpp b/compiler/nest/core/src/Closure.test.cpp

index 495e2186a9006bab04ea2ae2cbc6eca223d302d5..458179fb84a0e645231d9b969ef21b898b1a465e 100644 (file)
--- a/compiler/nest/core/src/Closure.test.cpp
+++ b/compiler/nest/core/src/Closure.test.cpp
@@ -23,7 +23,7 @@ namespace
  struct DummyNode final : public nest::expr::Node
  {
  };
-}
+} // namespace
  
  TEST(Closure, ctor)
  {
diff --git a/compiler/nest/core/src/Expr.test.cpp b/compiler/nest/core/src/Expr.test.cpp

index 2e26c234a09bb7a10656eb0dc75eda66837d4699..1b2e7135a9de249c4aec560b7a856a642ed049ab 100644 (file)
--- a/compiler/nest/core/src/Expr.test.cpp
+++ b/compiler/nest/core/src/Expr.test.cpp
@@ -25,7 +25,7 @@ namespace
  struct DummyNode final : public nest::expr::Node
  {
  };
-}
+} // namespace
  
  TEST(EXPR, operator_sum)
  {
diff --git a/compiler/nest/core/src/Ret.test.cpp b/compiler/nest/core/src/Ret.test.cpp

index a85223578502f2a788d1a0bcd5f636a2727112af..98f47d897e2533cc2cf2b9af14a34ca1bc213b3b 100644 (file)
--- a/compiler/nest/core/src/Ret.test.cpp
+++ b/compiler/nest/core/src/Ret.test.cpp
@@ -23,7 +23,7 @@ namespace
  struct DummyNode final : public nest::expr::Node
  {
  };
-}
+} // namespace
  
  TEST(RET, ctor)
  {
diff --git a/compiler/nest/core/src/expr/AddNode.test.cpp b/compiler/nest/core/src/expr/AddNode.test.cpp

index dba6cc826fc56b10f05af22168dda2c8dcd5fc61..d8ef1d08bc139dec2e8663bfc15396044023ec40 100644 (file)
--- a/compiler/nest/core/src/expr/AddNode.test.cpp
+++ b/compiler/nest/core/src/expr/AddNode.test.cpp
@@ -25,7 +25,7 @@ namespace
  struct DummyNode final : public nest::expr::Node
  {
  };
-}
+} // namespace
  
  TEST(ADD_NODE, cast)
  {
diff --git a/compiler/nest/core/src/expr/DerefNode.test.cpp b/compiler/nest/core/src/expr/DerefNode.test.cpp

index 125d8bf1ed7d5012fa6656ccf557d876d8440eb1..d0badd5090e9cced7229aa9c800c1bef2bd68280 100644 (file)
--- a/compiler/nest/core/src/expr/DerefNode.test.cpp
+++ b/compiler/nest/core/src/expr/DerefNode.test.cpp
@@ -25,7 +25,7 @@ namespace
  struct DummyNode final : public nest::expr::Node
  {
  };
-}
+} // namespace
  
  TEST(DEREF_NODE, cast)
  {
diff --git a/compiler/nest/core/src/expr/MulNode.test.cpp b/compiler/nest/core/src/expr/MulNode.test.cpp

index 85cb5a56ee3bb0a262d465cf57d2faab41b4782b..bccbcb3b5c4520da4956dc301cc799c1c047f48b 100644 (file)
--- a/compiler/nest/core/src/expr/MulNode.test.cpp
+++ b/compiler/nest/core/src/expr/MulNode.test.cpp
@@ -25,7 +25,7 @@ namespace
  struct DummyNode final : public nest::expr::Node
  {
  };
-}
+} // namespace
  
  TEST(MUL_NODE, cast)
  {
diff --git a/compiler/nest/core/src/stmt/PushNode.test.cpp b/compiler/nest/core/src/stmt/PushNode.test.cpp

index c02c69220829af1e7f6f068e7fc911d57885e5e5..fb58a125ed87f3453ad4cfce1f996b938427a04d 100644 (file)
--- a/compiler/nest/core/src/stmt/PushNode.test.cpp
+++ b/compiler/nest/core/src/stmt/PushNode.test.cpp
@@ -25,7 +25,7 @@ namespace
  struct DummyExprNode final : public nest::expr::Node
  {
  };
-}
+} // namespace
  
  TEST(STMT_PUSH_NODE, cast)
  {
diff --git a/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp b/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp

index 3a5b9ecaf2b3c5fdea6cd1573ca58b80220f7e86..cad05cc1df258ec9649b8f388e253791cd2661e5 100644 (file)
--- a/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp
+++ b/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp
@@ -30,7 +30,7 @@ using namespace std;
  namespace fs = boost::filesystem;
  
  AclCppCodeGenerator::AclCppCodeGenerator(string output_dir, string artifact_name)
-    : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
+  : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
  {
  }
  
diff --git a/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp b/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp

index b5e3734aea6bbeb5e00dfcaca35bbc46678f39ab..0abe3ec726980e29ced53f74c49c96369f59c94b 100644 (file)
--- a/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp
+++ b/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp
@@ -33,8 +33,8 @@ using namespace std;
  using namespace mir;
  
  AclCppOpGenerator::AclCppOpGenerator(const string &name, ostream &par_out)
-    : _parOut(par_out), _module(name), _constrBlock(nullptr), _infBlock(nullptr),
-      _clScheduler(AF::id("arm_compute::CLScheduler"))
+  : _parOut(par_out), _module(name), _constrBlock(nullptr), _infBlock(nullptr),
+    _clScheduler(AF::id("arm_compute::CLScheduler"))
  {
  }
  
@@ -60,13 +60,14 @@ const ArtifactModule &AclCppOpGenerator::generate(mir::Graph *g)
    _parInVar = _artifactClass->var(false, "std::ifstream", "_parIn");
    _parIn = _parInVar->use();
    string par_file_name = _module.name() + ".par";
-  _constrBlock->call("open", {AF::lit("\"" + par_file_name + "\""),
-                              AF::lit("std::ios_base::in | std::ios_base::binary")},
-                     _parIn);
+  _constrBlock->call(
+    "open",
+    {AF::lit("\"" + par_file_name + "\""), AF::lit("std::ios_base::in | std::ios_base::binary")},
+    _parIn);
    auto file_fail = _constrBlock->ifCond(AF::call("fail", {}, _parIn));
    auto file_fail_block = file_fail->getBlock();
    file_fail_block->addStatement(
-      AF::lit("throw std::string(\"Failed to open file: " + par_file_name + " for reading\")"));
+    AF::lit("throw std::string(\"Failed to open file: " + par_file_name + " for reading\")"));
  
    // Traverse the computational graph.
    g->accept(this);
@@ -89,8 +90,8 @@ void AclCppOpGenerator::visit(ops::ConcatOp &op)
    const auto *ir_output = op.getOutput(0);
  
    static const char *axis_names[] = {
-      "arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL",
-      "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH"};
+    "arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL",
+    "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH"};
  
    int axis = op.getAxis();
    assert(axis >= 0 && axis < static_cast<int>(sizeof(axis_names) / sizeof(axis_names[0])) &&
@@ -105,8 +106,8 @@ void AclCppOpGenerator::visit(ops::ConcatOp &op)
    for (const Operation::Output *ir_input : ir_inputs)
      _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(ir_input)))}, inputs);
  
-  auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix,
-                        {inputs, AF::ref(out), AF::lit(axis_name)});
+  auto layer =
+    genLayer("arm_compute::CLConcatenateLayer", prefix, {inputs, AF::ref(out), AF::lit(axis_name)});
  
    addToPersistentTensors(out);
    genLayerExecution(layer);
@@ -214,13 +215,13 @@ shared_ptr<ArtifactVariable> AclCppOpGenerator::genPadStrideInfo(const Op &op, c
    string var_name = prefix + "_pad_stride_info";
  
    list<std::shared_ptr<ArtifactExpr>> var_init_params = {
-      AF::lit(to_string(strides.dim(1))),
-      AF::lit(to_string(strides.dim(0))),
-      AF::lit(to_string(padding_before.at(1))),
-      AF::lit(to_string(padding_after.at(1))),
-      AF::lit(to_string(padding_before.at(0))),
-      AF::lit(to_string(padding_after.at(0))),
-      AF::lit("arm_compute::DimensionRoundingType::FLOOR")};
+    AF::lit(to_string(strides.dim(1))),
+    AF::lit(to_string(strides.dim(0))),
+    AF::lit(to_string(padding_before.at(1))),
+    AF::lit(to_string(padding_after.at(1))),
+    AF::lit(to_string(padding_before.at(0))),
+    AF::lit(to_string(padding_after.at(0))),
+    AF::lit("arm_compute::DimensionRoundingType::FLOOR")};
  
    auto pad_stride_info_var = block->var(type_name, var_name, {}, var_init_params);
  
@@ -316,7 +317,7 @@ static bool shouldSerializeConstant(const ops::ConstantOp &op)
    // themselves,
    // so we don't serialize them here, also we don't serialize tensors from dangling ConstantOp
    static std::map<Operation::Type, std::size_t> self_serializing_ops_to_inputs{
-      {Operation::Type::conv2D, 1}, {Operation::Type::fullyConnected, 1}};
+    {Operation::Type::conv2D, 1}, {Operation::Type::fullyConnected, 1}};
  
    for (Operation::Use use : op.getOutput(0)->getUses())
    {
@@ -420,8 +421,8 @@ void AclCppOpGenerator::visit(ops::PadOp &op)
    for (int i = 0; i < ir_input->getShape().rank(); ++i)
    {
      auto pad_var = _constrBlock->var(
-        "arm_compute::PaddingInfo", prefix + "_pad_" + to_string(i), {},
-        {AF::lit(to_string(padding_before[i])), AF::lit(to_string(padding_after[i]))});
+      "arm_compute::PaddingInfo", prefix + "_pad_" + to_string(i), {},
+      {AF::lit(to_string(padding_before[i])), AF::lit(to_string(padding_after[i]))});
      auto pad = pad_var->use();
      _constrBlock->call("push_back", {pad}, pad_list);
    }
@@ -430,7 +431,7 @@ void AclCppOpGenerator::visit(ops::PadOp &op)
    // FIXME Set up the `constant_value` parameter.
    assert(op.getPaddingValue() == 0.0f);
    auto layer =
-      genLayer("arm_compute::CLPadLayer", prefix, {AF::ref(input), AF::ref(out), pad_list});
+    genLayer("arm_compute::CLPadLayer", prefix, {AF::ref(input), AF::ref(out), pad_list});
    genLayerExecution(layer);
  }
  
@@ -449,7 +450,7 @@ void AclCppOpGenerator::genPooling(Op &op, const std::string &pooling_type, bool
    // Transpose data from MIR format to format compatible with ACL
    const string transposed_input_name = output_tensor_name + "transposed_input";
    shared_ptr<ArtifactId> transposed_input =
-      genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), in_id);
+    genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), in_id);
  
    const string layer_name = output_tensor_name + "_pooling_layer";
  
@@ -459,31 +460,31 @@ void AclCppOpGenerator::genPooling(Op &op, const std::string &pooling_type, bool
  
    // Create kernel window info
    shared_ptr<ArtifactVariable> kernel_window_var = _constrBlock->var(
-      "arm_compute::Size2D", layer_name + "_kernel_window", {},
-      {AF::lit(to_string(op.getWindowSize()[1])), AF::lit(to_string(op.getWindowSize()[0]))});
+    "arm_compute::Size2D", layer_name + "_kernel_window", {},
+    {AF::lit(to_string(op.getWindowSize()[1])), AF::lit(to_string(op.getWindowSize()[0]))});
    shared_ptr<ArtifactId> kernel_window = kernel_window_var->use();
  
    // Create pooling info: pooling type, kernel info, strides, etc
    shared_ptr<ArtifactVariable> pooling_info_var =
-      _constrBlock->var("arm_compute::PoolingLayerInfo", layer_name + "_pooling_info", {},
-                        {AF::lit(pooling_type), kernel_window, pad_stride_info,
-                         AF::lit(exclude_padding ? "true" : "false")});
+    _constrBlock->var("arm_compute::PoolingLayerInfo", layer_name + "_pooling_info", {},
+                      {AF::lit(pooling_type), kernel_window, pad_stride_info,
+                       AF::lit(exclude_padding ? "true" : "false")});
    shared_ptr<ArtifactId> pooling_info = pooling_info_var->use();
  
    // Generate auxiliary tensor to hold transposed output of pool in NCHW format
    Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
    shared_ptr<ArtifactId> transposed_output =
-      genTensor(layer_name + "_out_transpose", transposed_output_shape);
+    genTensor(layer_name + "_out_transpose", transposed_output_shape);
  
    // Actual layer creation
    shared_ptr<ArtifactId> layer =
-      genLayer("arm_compute::CLPoolingLayer", layer_name,
-               {AF::ref(transposed_input), AF::ref(transposed_output), pooling_info});
+    genLayer("arm_compute::CLPoolingLayer", layer_name,
+             {AF::ref(transposed_input), AF::ref(transposed_output), pooling_info});
    genTensorAllocation(_infBlock, transposed_output);
    genLayerExecution(layer);
  
    shared_ptr<ArtifactId> output =
-      genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+    genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
  
    genTensorDeallocation(_infBlock, transposed_input);
    genTensorDeallocation(_infBlock, transposed_output);
@@ -521,13 +522,13 @@ void AclCppOpGenerator::genConvolution(Op &op, const string &acl_func_name, cons
  
    // Generate auxiliary tensor to hold transposed input of convolution in NCHW format
    shared_ptr<ArtifactId> transposed_input =
-      genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input->getShape(), input);
+    genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input->getShape(), input);
  
    // Create the transposed output tensor in the DOM.
    const string transposed_output_name = output_tensor_name + "_transposed_output";
    Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
    shared_ptr<ArtifactId> transposed_output =
-      genTensor(transposed_output_name, transposed_output_shape);
+    genTensor(transposed_output_name, transposed_output_shape);
  
    string operation_name = output_tensor_name + suffix;
  
@@ -564,7 +565,7 @@ void AclCppOpGenerator::genConvolution(Op &op, const string &acl_func_name, cons
  
    // Generate auxiliar tensor to hold transposed output of convolution in NHWC format
    shared_ptr<ArtifactId> output =
-      genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+    genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
  
    genTensorDeallocation(_infBlock, transposed_input);
    genTensorDeallocation(_infBlock, transposed_output);
@@ -589,9 +590,9 @@ void AclCppOpGenerator::genActivation(const Operation &op, const std::string &ac
    // constructor. This instance profide information about the concrete activation function,
    // like: ReLU, Tanh etc and two optional parameter (alpha and betha) needed by some activations.
    auto activation_info_var = _constrBlock->var(
-      "arm_compute::ActivationLayerInfo", prefix + "_activation_info", {},
-      {AF::lit("arm_compute::ActivationLayerInfo::ActivationFunction::" + activation_name),
-       AF::lit(to_string(a)), AF::lit(to_string(b))});
+    "arm_compute::ActivationLayerInfo", prefix + "_activation_info", {},
+    {AF::lit("arm_compute::ActivationLayerInfo::ActivationFunction::" + activation_name),
+     AF::lit(to_string(a)), AF::lit(to_string(b))});
    auto activation_info = activation_info_var->use();
  
    // Create an instance of the CLActivationLayer class as a member of the artifact class.
@@ -619,9 +620,10 @@ shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string &prefix, size
    auto arithmetic_add_layer = arithmetic_add_layer_var->use();
  
    // Generate the call: arithmetic_add_layer.configure(&in1, &in2, &out);
-  _constrBlock->call("configure", {AF::ref(in1), AF::ref(in2), AF::ref(out),
-                                   AF::lit("arm_compute::ConvertPolicy::WRAP")},
-                     arithmetic_add_layer);
+  _constrBlock->call(
+    "configure",
+    {AF::ref(in1), AF::ref(in2), AF::ref(out), AF::lit("arm_compute::ConvertPolicy::WRAP")},
+    arithmetic_add_layer);
  
    // Generate the call: arithmetic_add_layer.run();
    _infBlock->call("run", {}, arithmetic_add_layer);
@@ -696,8 +698,8 @@ string AclCppOpGenerator::tensorName(const Operation::Output *ir_tensor) const
    if (!tensor_name.empty())
    {
      tensor_name = "_" + tensor_name;
-    replace_if(tensor_name.begin(), tensor_name.end(), [](char c) { return std::isalnum(c) == 0; },
-               '_');
+    replace_if(
+      tensor_name.begin(), tensor_name.end(), [](char c) { return std::isalnum(c) == 0; }, '_');
    }
    else
    {
@@ -740,7 +742,7 @@ shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(const string &name, const Sh
  
      const char *type_name = "arm_compute::TensorShape";
      shared_ptr<ArtifactId> shape =
-        genVectorInitializedVar(_constrBlock, type_name, name + "_shape", shape_vectorized);
+      genVectorInitializedVar(_constrBlock, type_name, name + "_shape", shape_vectorized);
      _constrBlock->call("initializeTensor", {id, shape});
  
      if (gen_accessor)
@@ -903,7 +905,7 @@ void AclCppOpGenerator::genTranspose(const std::shared_ptr<nnc::ArtifactId> &inp
  
    // Create operation parameter containing permutation vector
    shared_ptr<ArtifactId> perm_vector = genVectorInitializedVar(
-      _constrBlock, "arm_compute::PermutationVector", out_name + "_perm_param", acl_perm);
+    _constrBlock, "arm_compute::PermutationVector", out_name + "_perm_param", acl_perm);
  
    // Instantiate the CLPermute object.
    string layer_name = out_name + "_transpose_layer";
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp

index 8888697e7a5056b7f7f69d8be71bc5e7651594f0..bbaa1f5230864cf4e3de648259163d7b807b281f 100644 (file)
--- a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp
@@ -25,8 +25,8 @@ using namespace std;
  ArtifactFunctionCall::ArtifactFunctionCall(string func_name,
                                             list<shared_ptr<ArtifactExpr>> param_list,
                                             shared_ptr<ArtifactExpr> on, ArtifactCallType call_type)
-    : _funcName(std::move(func_name)), _callType(call_type), _on(std::move(on)),
-      _paramList(std::move(param_list))
+  : _funcName(std::move(func_name)), _callType(call_type), _on(std::move(on)),
+    _paramList(std::move(param_list))
  {
  }
  
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h

index 106c9bec3fb83906f3626c36532575c8bd672c67..89d8030212c579b346926c9e90a23b8dda2ea013 100644 (file)
--- a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h
@@ -204,7 +204,7 @@ class ArtifactUnaryExpr : public ArtifactExpr
  {
  public:
    ArtifactUnaryExpr(ArtifactUnOp op, std::shared_ptr<ArtifactExpr> expr)
-      : _op(op), _expr(std::move(expr))
+    : _op(op), _expr(std::move(expr))
    {
    }
  
@@ -248,7 +248,7 @@ class ArtifactBinaryExpr : public ArtifactExpr
  public:
    ArtifactBinaryExpr(ArtifactBinOp op, std::shared_ptr<ArtifactExpr> left,
                       std::shared_ptr<ArtifactExpr> right)
-      : _op(op), _left(std::move(left)), _right(std::move(right))
+    : _op(op), _left(std::move(left)), _right(std::move(right))
    {
    }
  
@@ -271,7 +271,7 @@ class ArtifactIndex : public ArtifactExpr
  {
  public:
    ArtifactIndex(std::shared_ptr<ArtifactExpr> expr, std::shared_ptr<ArtifactExpr> ind)
-      : _expr(std::move(expr)), _ind(std::move(ind))
+    : _expr(std::move(expr)), _ind(std::move(ind))
    {
    }
  
@@ -328,8 +328,8 @@ public:
    ArtifactVariable(std::string type_name, std::string var_name,
                     std::list<std::shared_ptr<ArtifactExpr>> dimensions = {},
                     std::list<std::shared_ptr<ArtifactExpr>> initializers = {})
-      : _typeName(std::move(type_name)), _dimensions(std::move(dimensions)),
-        _initializers(std::move(initializers)), ArtifactNamed(std::move(var_name))
+    : _typeName(std::move(type_name)), _dimensions(std::move(dimensions)),
+      _initializers(std::move(initializers)), ArtifactNamed(std::move(var_name))
    {
    }
  
@@ -469,7 +469,7 @@ public:
    explicit ArtifactForLoop(std::shared_ptr<ArtifactVariable> init = nullptr,
                             std::shared_ptr<ArtifactExpr> cond = nullptr,
                             std::shared_ptr<ArtifactExpr> iter = nullptr)
-      : _init(std::move(init)), _cond(std::move(cond)), _iter(std::move(iter))
+    : _init(std::move(init)), _cond(std::move(cond)), _iter(std::move(iter))
    {
    }
  
@@ -527,7 +527,7 @@ public:
     */
    ArtifactFunction(std::string ret_type_name, const std::string &func_name,
                     std::list<std::shared_ptr<ArtifactVariable>> params = {})
-      : ArtifactNamed(func_name), _params(std::move(params)), _retTypeName(std::move(ret_type_name))
+    : ArtifactNamed(func_name), _params(std::move(params)), _retTypeName(std::move(ret_type_name))
    {
    }
  
@@ -568,7 +568,7 @@ public:
                          const std::string &var_name,
                          const std::list<std::shared_ptr<ArtifactExpr>> &dimensions = {},
                          const std::list<std::shared_ptr<ArtifactExpr>> &initializers = {})
-      : ArtifactClassMember(owner), ArtifactVariable(type_name, var_name, dimensions, initializers)
+    : ArtifactClassMember(owner), ArtifactVariable(type_name, var_name, dimensions, initializers)
    {
    }
  
@@ -584,7 +584,7 @@ public:
    ArtifactClassFunction(const ArtifactClass *owner, const std::string &ret_type_name,
                          const std::string &func_name,
                          const std::list<std::shared_ptr<ArtifactVariable>> &params = {})
-      : ArtifactClassMember(owner), ArtifactFunction(ret_type_name, func_name, params)
+    : ArtifactClassMember(owner), ArtifactFunction(ret_type_name, func_name, params)
    {
    }
  
diff --git a/compiler/nnc/backends/interpreter/InterpreterBackend.cpp b/compiler/nnc/backends/interpreter/InterpreterBackend.cpp

index 923a7cfc74e657290d13dd511f56f906496805fb..895daa115c1556dbe5980847690647c9d5ca1c25 100644 (file)
--- a/compiler/nnc/backends/interpreter/InterpreterBackend.cpp
+++ b/compiler/nnc/backends/interpreter/InterpreterBackend.cpp
@@ -104,7 +104,7 @@ static void writeTensorToHDF5File(const TensorVariant &tensor, std::string tenso
  static TensorVariant readTensorFromFile(const std::string &filename, const TensorType &type)
  {
    const std::size_t input_data_size =
-      type.getShape().numElements() * getDataTypeSize(type.getElementType());
+    type.getShape().numElements() * getDataTypeSize(type.getElementType());
  
    std::ifstream stream(filename, std::ios::in | std::ios::binary);
    if (stream.fail())
@@ -117,9 +117,9 @@ static TensorVariant readTensorFromFile(const std::string &filename, const Tenso
    int64_t file_size = end - begin;
  
    if (static_cast<std::size_t>(file_size) != input_data_size)
-    throw std::runtime_error("File \"" + filename + "\" has incorrect size: " +
-                             std::to_string(file_size) + "(expected: " +
-                             std::to_string(input_data_size) + ").");
+    throw std::runtime_error("File \"" + filename +
+                             "\" has incorrect size: " + std::to_string(file_size) +
+                             "(expected: " + std::to_string(input_data_size) + ").");
  
    std::unique_ptr<char[]> data(new char[input_data_size]);
    stream.read(data.get(), input_data_size);
@@ -130,7 +130,7 @@ static TensorVariant readTensorFromFile(const std::string &filename, const Tenso
  }
  
  InterpreterBackend::InterpreterBackend(std::string input_dir, std::string output_dir)
-    : _input_dir(std::move(input_dir)), _output_dir(std::move(output_dir))
+  : _input_dir(std::move(input_dir)), _output_dir(std::move(output_dir))
  {
  }
  
diff --git a/compiler/nnc/backends/soft_backend/CPPGenerator.cpp b/compiler/nnc/backends/soft_backend/CPPGenerator.cpp

index 236881b807bef1c33e44f8554ab95508337ab81b..097122882ecbe36d0d3476f72fa16cedde32ddca 100644 (file)
--- a/compiler/nnc/backends/soft_backend/CPPGenerator.cpp
+++ b/compiler/nnc/backends/soft_backend/CPPGenerator.cpp
@@ -80,7 +80,7 @@ static unique_ptr<ofstream> getStream(const string &path)
  }
  
  CPPCodeGenerator::CPPCodeGenerator(std::string output_dir, std::string artifact_name)
-    : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
+  : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
  {
  }
  
@@ -187,12 +187,14 @@ void CPPCodeGenerator::materializeHeader(ostream &out, const ModelAnalyzer &ma)
    string class_name = ma.getModelName() + "Model";
  
    out.write(cpp_header_types, sizeof(cpp_header_types));
-  out << "class " << class_name << "\n"
-                                   "{\n"
-                                   "public:\n"
-                                   "  "
-      << class_name << "(const std::string& parametersPath);\n"
-                       "  ~"
+  out << "class " << class_name
+      << "\n"
+         "{\n"
+         "public:\n"
+         "  "
+      << class_name
+      << "(const std::string& parametersPath);\n"
+         "  ~"
        << class_name << "();\n";
    // generate input setters
    if (ma.getInputs().size() == 1)
@@ -215,10 +217,12 @@ void CPPCodeGenerator::materializeHeader(ostream &out, const ModelAnalyzer &ma)
    out << "  void doInference();\n\n"
           "private:\n"
           "  "
-      << class_name << "() = delete;\n"
-                       "  "
-      << class_name << "(const " << class_name << "& orig) = delete;\n"
-                                                  "  "
+      << class_name
+      << "() = delete;\n"
+         "  "
+      << class_name << "(const " << class_name
+      << "& orig) = delete;\n"
+         "  "
        << class_name << "& operator=(const " << class_name << "& orig) = delete;\n";
    // generate input/output tensors
    for (const size_t in_tensor_id : ma.getInputs())
@@ -273,8 +277,9 @@ void CPPCodeGenerator::printSetter(ostream &out, const string &class_name,
  {
  
    const string &var_name = _formattedTensors[td.id];
-  out << "bool " << class_name << "::set" << setter_name << "(const Tensor& t)\n"
-                                                            "{\n";
+  out << "bool " << class_name << "::set" << setter_name
+      << "(const Tensor& t)\n"
+         "{\n";
    // need to insert input correctness check
    const mir::Shape expected = td.shape;
    int rank = expected.rank();
@@ -286,9 +291,10 @@ void CPPCodeGenerator::printSetter(ostream &out, const string &class_name,
        out << "  "
            << "if (t.getShape()[" << i << "] != " << expected.dim(i) << ") return false;\n";
    }
-  out << "  " << var_name << " = t;\n"
-                             "  return true;\n"
-                             "}\n\n";
+  out << "  " << var_name
+      << " = t;\n"
+         "  return true;\n"
+         "}\n\n";
  }
  
  void CPPCodeGenerator::printGetter(ostream &out, const string &class_name,
@@ -296,11 +302,13 @@ void CPPCodeGenerator::printGetter(ostream &out, const string &class_name,
  {
  
    const string &var_name = _formattedTensors[td.id];
-  out << "shared_ptr<Tensor> " << class_name << "::get" << getter_name << "()\n"
-                                                                          "{\n"
-                                                                          "  return "
-      << var_name << ";\n"
-                     "}\n\n";
+  out << "shared_ptr<Tensor> " << class_name << "::get" << getter_name
+      << "()\n"
+         "{\n"
+         "  return "
+      << var_name
+      << ";\n"
+         "}\n\n";
  }
  
  void CPPCodeGenerator::materializeCall(ostream &out, const ModelAnalyzer &ma,
@@ -435,13 +443,15 @@ void CPPCodeGenerator::materializeCode(ostream &out, const ModelAnalyzer &ma, co
        << "(const string& parametersPath)\n"
           "{\n"
           "  readParameters(_parameters, _paramSize, parametersPath, "
-      << s.getFormatVersion() << ", " << s.getModelHash() << ");\n"
-                                                             "}\n\n";
+      << s.getFormatVersion() << ", " << s.getModelHash()
+      << ");\n"
+         "}\n\n";
    // gen NN destructor
-  out << class_name << "::~" << class_name << "()\n"
-                                              "{\n"
-                                              "  releaseParameters(_parameters, _paramSize);\n"
-                                              "}\n\n";
+  out << class_name << "::~" << class_name
+      << "()\n"
+         "{\n"
+         "  releaseParameters(_parameters, _paramSize);\n"
+         "}\n\n";
    // generate input setters
    // generate main setter if network has only one
    const auto &inputs = ma.getInputs();
@@ -473,8 +483,9 @@ void CPPCodeGenerator::materializeCode(ostream &out, const ModelAnalyzer &ma, co
      const TensorDescriptor &td = tensors[output_tensor_id];
      printGetter(out, class_name, output_tensor_name, td);
    }
-  out << "void " << class_name << "::doInference()\n"
-                                  "{\n";
+  out << "void " << class_name
+      << "::doInference()\n"
+         "{\n";
    for (size_t output_tensor_id : ma.getPersistentTensors())
    {
      const string &output_tensor_name = _formattedTensors[output_tensor_id];
diff --git a/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp

index 82e62b5310d87a8ea38e5890beddad32d8845187..2d555d0a987049309debb89a9358221f8f0009b0 100644 (file)
--- a/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp
+++ b/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp
@@ -62,7 +62,7 @@ void ModelAnalyzer::appendOperationToInference(Operation *op, const string &func
      {
        const auto &tensor_name = output.getName();
        const auto tensor_id =
-          tensor_name.empty() ? declareTemporaryTensor() : declarePersistentTensor(tensor_name);
+        tensor_name.empty() ? declareTemporaryTensor() : declarePersistentTensor(tensor_name);
        node_output_tensors.push_back(tensor_id);
      }
    }
@@ -82,7 +82,7 @@ void ModelAnalyzer::appendOperationToInference(Operation *op, const string &func
  
    std::copy(aux_args.begin(), aux_args.end(), std::back_inserter(node_input_tensors));
    unique_ptr<Action> operation_call(new CallFunction(
-      op, function_name, std::move(node_input_tensors), std::move(node_output_tensors)));
+    op, function_name, std::move(node_input_tensors), std::move(node_output_tensors)));
    _inferenceSequence.push_back(std::move(operation_call));
    _opToDescr[op] = _inferenceSequence.back().get();
  }
diff --git a/compiler/nnc/backends/soft_backend/ModelAnalyzer.h b/compiler/nnc/backends/soft_backend/ModelAnalyzer.h

index 471c310116a8b31becf6c4b5acf3181068906d21..6522bc6556b0769e23d3a492d7751144774c6ec0 100644 (file)
--- a/compiler/nnc/backends/soft_backend/ModelAnalyzer.h
+++ b/compiler/nnc/backends/soft_backend/ModelAnalyzer.h
@@ -42,9 +42,9 @@ class ModelAnalyzer : public mir::Visitor
  {
  public:
    /**
- * @brief contructs inference sequence
- * @param g pointer to graph to linearize
- */
+   * @brief contructs inference sequence
+   * @param g pointer to graph to linearize
+   */
    void analyze(const mir::Graph *g);
  
    void visit(mir::ops::AbsOp &) override;
diff --git a/compiler/nnc/backends/soft_backend/SequencedIR.h b/compiler/nnc/backends/soft_backend/SequencedIR.h

index 9a761243e6821d40009e1e8a6412f06d7fcc2a45..ff062e043f198abd9c60a5e023820301a05c3aee 100644 (file)
--- a/compiler/nnc/backends/soft_backend/SequencedIR.h
+++ b/compiler/nnc/backends/soft_backend/SequencedIR.h
@@ -91,7 +91,7 @@ struct TransposeTensor : public Action
  {
  
    TransposeTensor(size_t input, size_t output, std::vector<int32_t> &&perm)
-      : Action(Type::transposeTensor), perm(std::move(perm)), input(input), output(output)
+    : Action(Type::transposeTensor), perm(std::move(perm)), input(input), output(output)
    {
    }
  
@@ -121,8 +121,8 @@ struct CallFunction : public Action
  
    CallFunction(mir::Operation *op, std::string func_name, std::vector<size_t> &&inputs,
                 std::vector<size_t> &&outputs)
-      : Action(Type::callFunction), mirOp(op), funcName(std::move(func_name)), inputs(inputs),
-        outputs(outputs), paramStartOffset(0)
+    : Action(Type::callFunction), mirOp(op), funcName(std::move(func_name)), inputs(inputs),
+      outputs(outputs), paramStartOffset(0)
    {
    }
  
diff --git a/compiler/nnc/driver/Options.cpp b/compiler/nnc/driver/Options.cpp

index e22d0184755fc0cc7e64e62201412469c6edd001..c1997fe6ada55a13c151c311f73b8a75ca07296d 100644 (file)
--- a/compiler/nnc/driver/Options.cpp
+++ b/compiler/nnc/driver/Options.cpp
@@ -35,7 +35,7 @@ Option<bool> caffeFrontend(optname("--caffe"), overview("treat input file as Caf
  #else
                             showopt(false)
  #endif // NNC_FRONTEND_CAFFE_ENABLED
-                               );
+);
  Option<bool> onnxFrontend(optname("--onnx"), overview("treat input file as ONNX model"), false,
                            optional(true), optvalues(""), nullptr, separators(""),
  #ifdef NNC_FRONTEND_ONNX_ENABLED
@@ -43,7 +43,7 @@ Option<bool> onnxFrontend(optname("--onnx"), overview("treat input file as ONNX
  #else
                            showopt(false)
  #endif // NNC_FRONTEND_ONNX_ENABLED
-                              );
+);
  
  Option<bool> caffe2Frontend(optname("--caffe2"),
                              overview("treat input file as Caffe2 model (predict_net.pb)"), false,
@@ -83,16 +83,16 @@ Option<bool> tflFrontend(optname("--tflite"),
  #else
                           showopt(false)
  #endif // NNC_FRONTEND_TFLITE_ENABLED
-                             );
+);
  Option<std::string>
-    target(optname("--target"),
-           overview("select target language to emit for given architecture."
-                    "Valid values are '" NNC_TARGET_ARM_CPP "', '" NNC_TARGET_X86_CPP
-                    "', '" NNC_TARGET_ARM_GPU_CPP "', '" NNC_TARGET_INTERPRETER "'"),
-           std::string(), optional(false),
-           optvalues(NNC_TARGET_ARM_CPP "," NNC_TARGET_X86_CPP "," NNC_TARGET_ARM_GPU_CPP
-                                        "," NNC_TARGET_INTERPRETER),
-           nullptr, separators("="));
+  target(optname("--target"),
+         overview("select target language to emit for given architecture."
+                  "Valid values are '" NNC_TARGET_ARM_CPP "', '" NNC_TARGET_X86_CPP
+                  "', '" NNC_TARGET_ARM_GPU_CPP "', '" NNC_TARGET_INTERPRETER "'"),
+         std::string(), optional(false),
+         optvalues(NNC_TARGET_ARM_CPP "," NNC_TARGET_X86_CPP "," NNC_TARGET_ARM_GPU_CPP
+                                      "," NNC_TARGET_INTERPRETER),
+         nullptr, separators("="));
  
  /**
   * Options for *frontend*
diff --git a/compiler/nnc/include/pass/PassData.h b/compiler/nnc/include/pass/PassData.h

index e2c0b812906d477b404487bd40771ea0dbac314a..1ff8af927b2bda7f0f8b8fb3bc2f256a2ac5bba4 100644 (file)
--- a/compiler/nnc/include/pass/PassData.h
+++ b/compiler/nnc/include/pass/PassData.h
@@ -30,9 +30,8 @@ class PassData
  {
  public:
    /* implicit */ PassData(std::nullptr_t data)
-      : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
-        _dataContainer{.unknown = data},
-        _dataType(PDT::UNKNOWN)
+    : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+      _dataContainer{.unknown = data}, _dataType(PDT::UNKNOWN)
    {
    }
  
@@ -40,9 +39,8 @@ public:
     * @brief Implicit conversion from Graph* to PassData
     */
    /* implicit */ PassData(mir::Graph *graph)
-      : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
-        _dataContainer{.graph = graph},
-        _dataType(PDT::GRAPH)
+    : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+      _dataContainer{.graph = graph}, _dataType(PDT::GRAPH)
    {
    }
  
@@ -60,9 +58,8 @@ public:
     * @brief Implicit conversion from Graph* to PassData
     */
    /* implicit */ PassData(mir::TensorVariant *tv)
-      : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
-        _dataContainer{.tensorVariant = tv},
-        _dataType(PDT::TENSOR_VARIANT)
+    : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+      _dataContainer{.tensorVariant = tv}, _dataType(PDT::TENSOR_VARIANT)
    {
    }
  
diff --git a/compiler/nnc/include/passes/optimizations/CombineTransposes.h b/compiler/nnc/include/passes/optimizations/CombineTransposes.h

index 7d227cd5d8a8db39e963b6a11ade58e768a6dff8..a08676e47ccd496b3fc1e942d437c4b1c0dee4ac 100644 (file)
--- a/compiler/nnc/include/passes/optimizations/CombineTransposes.h
+++ b/compiler/nnc/include/passes/optimizations/CombineTransposes.h
@@ -33,6 +33,7 @@ public:
    PassData run(PassData data) override;
  
    std::string getName() override { return "opt_combine_transposes"; };
+
  private:
  };
  
diff --git a/compiler/nnc/include/passes/optimizations/OptimizationUtils.h b/compiler/nnc/include/passes/optimizations/OptimizationUtils.h

index 9a9212c123da6e10c9fde9529493fe2476e447ff..83f455b2ddc3d61db6df75497c328675584db573 100644 (file)
--- a/compiler/nnc/include/passes/optimizations/OptimizationUtils.h
+++ b/compiler/nnc/include/passes/optimizations/OptimizationUtils.h
@@ -25,11 +25,11 @@ namespace nnc
  namespace opt_util
  {
  /**
-* @brief Swap adjacent nodes in Graph. Creates new nodes and replaces the old ones with new.
-* @param g MIR Graph
-* @param top Node
-* @param bottom Node
-*/
+ * @brief Swap adjacent nodes in Graph. Creates new nodes and replaces the old ones with new.
+ * @param g MIR Graph
+ * @param top Node
+ * @param bottom Node
+ */
  void swapAdjacent(mir::Graph *g, mir::Operation *top, mir::Operation *bottom);
  
  // TODO: this function and it's usages should be removed, after DCE optimization will be implemented
diff --git a/compiler/nnc/include/support/CommandLine.h b/compiler/nnc/include/support/CommandLine.h

index 40777ff46c36e97066a403d66fa787bdd7039134..66466276d5a3ebbebfa8915d949bf43d235c980e 100644 (file)
--- a/compiler/nnc/include/support/CommandLine.h
+++ b/compiler/nnc/include/support/CommandLine.h
@@ -38,7 +38,7 @@ class BadOption : public std::logic_error
  {
  public:
    explicit BadOption(const std::string &msg, std::string optname = "", std::string value = "")
-      : std::logic_error(msg), _option_name(std::move(optname)), _option_value(std::move(value))
+    : std::logic_error(msg), _option_name(std::move(optname)), _option_value(std::move(value))
    {
    }
  
@@ -387,7 +387,7 @@ private:
    std::map<std::string, IOption *> _options_name; // map of name -> option
    std::vector<IOption *> _options;                // options
    std::map<IOption::Group, std::vector<IOption *>>
-      _grouped_options;   // map of groups: group -> vector of options
+    _grouped_options;     // map of groups: group -> vector of options
    std::string _prog_name; // name of program
    int _args_num = 0;      // number of command line arguments
  };
@@ -530,7 +530,7 @@ Option<T>::Option(const std::vector<std::string> &optnames, const std::string &d
    _group = group;
  
    _can_have_several_vals =
-      std::is_same<T, std::vector<std::string>>::value || std::is_same<T, std::vector<int>>::value;
+    std::is_same<T, std::vector<std::string>>::value || std::is_same<T, std::vector<int>>::value;
    assert(!(_can_have_several_vals && !_seps.empty()) &&
           "option with several values can't have separators");
  
diff --git a/compiler/nnc/passes/optimizations/CombineTransposes.cpp b/compiler/nnc/passes/optimizations/CombineTransposes.cpp

index e381a9cae507a76c0c976703f1312d547667a2de..8a584d2d53c5a07d41066d54db188ed59e3d7d97 100644 (file)
--- a/compiler/nnc/passes/optimizations/CombineTransposes.cpp
+++ b/compiler/nnc/passes/optimizations/CombineTransposes.cpp
@@ -72,12 +72,12 @@ nnc::PassData nnc::CombineTransposes::run(nnc::PassData data)
        };
        auto *bottom_transpose = dynamic_cast<mir::ops::TransposeOp *>(match.second);
        auto combined_axis_order =
-          combineAxisOrders(top_transpose->getAxisOrder(), bottom_transpose->getAxisOrder());
+        combineAxisOrders(top_transpose->getAxisOrder(), bottom_transpose->getAxisOrder());
  
        if (!isIdentityTranspose(combined_axis_order))
        {
          auto new_tr_op =
-            g->create<mir::ops::TransposeOp>(top_transpose->getInput(0), combined_axis_order);
+          g->create<mir::ops::TransposeOp>(top_transpose->getInput(0), combined_axis_order);
  
          g->replaceNode(bottom_transpose, new_tr_op);
        }
diff --git a/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp b/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp

index b89dca1b7286e7e5867a950a1f07d5d20da3a887..371d9703ff07fbbde0a1e3ce758ca5be6fda013f 100644 (file)
--- a/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp
+++ b/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp
@@ -33,8 +33,8 @@ nnc::PassData nnc::DeadCodeElimination::run(PassData data)
        return;
  
      bool has_no_uses =
-        std::all_of(op->getOutputs().cbegin(), op->getOutputs().cend(),
-                    [](const Operation::Output &output) { return output.getUses().empty(); });
+      std::all_of(op->getOutputs().cbegin(), op->getOutputs().cend(),
+                  [](const Operation::Output &output) { return output.getUses().empty(); });
  
      if (has_no_uses)
      {
diff --git a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp

index 91686ef74da2f674a821d8f2959b44a3e1a70e7d..d69439fc325755da569c3e7b290ad068ce47b124 100644 (file)
--- a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
+++ b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
@@ -215,10 +215,10 @@ bool sinkAddThroughMul(Graph *g)
      // Create new operations
      auto old_add_input = old_add_op->getInput(0);
      auto new_mul_op =
-        g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
+      g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
      auto new_add_const_op = mergeConstantOps(g, old_add_const_op, ols_mul_const_op, OpType::mul);
      auto new_add_op =
-        g->copyOpWithInputs(old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
+      g->copyOpWithInputs(old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
  
      // Replace old mul with new add and remove old nodes
      g->replaceNode(old_mul_op, new_add_op);
diff --git a/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp b/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp

index 8ff842660ee0f67d189af97a23ddbbe88f9cf463..fcdbba878e53068f4480d647c2319e64d95be063 100644 (file)
--- a/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp
+++ b/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp
@@ -27,7 +27,7 @@
  namespace nnc
  {
  DataFormatSwitcher::DataFormatSwitcher(const mir::DataFormat target_format)
-    : _target_format(target_format)
+  : _target_format(target_format)
  {
  }
  
@@ -89,10 +89,10 @@ mir::Operation::Output *DataFormatSwitcher::insertTransposeBefore(mir::Operation
    mir::Operation::Output *new_out;
    if (_target_format == mir::DataFormat::NHWC)
      new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 2, 3, 1})
-                  ->getOutput(0); // NCHW -> NHWC
+                ->getOutput(0); // NCHW -> NHWC
    else
      new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 3, 1, 2})
-                  ->getOutput(0); // NHWC -> NCHW
+                ->getOutput(0); // NHWC -> NCHW
    if (out->getType().isQuantized())
      new_out->setQuantization(out->getType().getQuantization());
    return new_out;
@@ -103,10 +103,10 @@ mir::Operation::Output *DataFormatSwitcher::insertTransposeAfter(mir::Operation:
    mir::Operation::Output *new_out;
    if (_target_format == mir::DataFormat::NHWC)
      new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 3, 1, 2})
-                  ->getOutput(0); // NHWC -> NCHW
+                ->getOutput(0); // NHWC -> NCHW
    else
      new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 2, 3, 1})
-                  ->getOutput(0); // NCHW -> NHWC
+                ->getOutput(0); // NCHW -> NHWC
    if (out->getType().isQuantized())
      new_out->setQuantization(out->getType().getQuantization());
    return new_out;
diff --git a/compiler/nnc/passes/transformations/LowerConv2D.cpp b/compiler/nnc/passes/transformations/LowerConv2D.cpp

index 9e32978bcf156154b8317acbdedf24053c99cb94..9ae20527d573cd9e94e3de15a675338bff8abd52 100644 (file)
--- a/compiler/nnc/passes/transformations/LowerConv2D.cpp
+++ b/compiler/nnc/passes/transformations/LowerConv2D.cpp
@@ -36,11 +36,11 @@ static void lowerConv2D(mir::Graph *graph, mir::ops::Conv2DOp *op)
      // [O, H, W, I / M] == [M, H, W, 1] -> [H, W, M, 1]
      std::vector<std::size_t> perm{1, 2, 0, 3};
      mir::Operation::Output *new_kernel =
-        graph->create<mir::ops::TransposeOp>(kernel, perm)->getOutput(0);
+      graph->create<mir::ops::TransposeOp>(kernel, perm)->getOutput(0);
      mir::Conv2DOpAttributes attributes = op->getAttributes();
      attributes.num_groups = 1;
      mir::Operation::Output *new_result =
-        graph->create<mir::ops::DepthwiseConv2DOp>(input, new_kernel, attributes)->getOutput(0);
+      graph->create<mir::ops::DepthwiseConv2DOp>(input, new_kernel, attributes)->getOutput(0);
      graph->replaceNode(op, new_result->getNode());
    }
  }
diff --git a/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp b/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp

index 4ae0203557c0e5e7b0d2ef5ef74166ddbb418fa7..d39c9dcb5849830a9738e98a41a53950c6b930c8 100644 (file)
--- a/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp
+++ b/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp
@@ -157,7 +157,7 @@ static void runAclSystemTest(const string &name)
  
    // Copy the model input HDF5 file to the remote device.
    ASSERT_TRUE(
-      copyToOdroid(binDir + "/" + name + "/in_" + name + "_caffe.hdf5", dir_name + "/in.hdf5"));
+    copyToOdroid(binDir + "/" + name + "/in_" + name + "_caffe.hdf5", dir_name + "/in.hdf5"));
  
    // Switch to the artifact directory on the remote device and run the artifact.
    ASSERT_TRUE(runOnOdroid("cd " + dir_name + "; ./nnc_test"));
diff --git a/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp

index c326b390b59d881b91e3ca5b79b801b60a4a0f72..ea4bddac8857c6a5420309776f29a256f4050c17 100644 (file)
--- a/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp
+++ b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp
@@ -31,12 +31,13 @@ static unique_ptr<char[]> getTensorData(CLTensor &tensor)
    Iterator i(&tensor, window);
    char *ptr = &buf[0];
  
-  execute_window_loop(window,
-                      [&i, &ptr](const Coordinates &) {
-                        memcpy(ptr, i.ptr(), sizeof(float));
-                        ptr += sizeof(float);
-                      },
-                      i);
+  execute_window_loop(
+    window,
+    [&i, &ptr](const Coordinates &) {
+      memcpy(ptr, i.ptr(), sizeof(float));
+      ptr += sizeof(float);
+    },
+    i);
  
    tensor.unmap();
    return buf;
@@ -52,12 +53,13 @@ static void readTensor(CLTensor &tensor, H5::DataSet &dataset)
    Iterator i(&tensor, window);
    char *ptr = &buf[0];
  
-  execute_window_loop(window,
-                      [&i, &ptr](const Coordinates &) {
-                        memcpy(i.ptr(), ptr, sizeof(float));
-                        ptr += sizeof(float);
-                      },
-                      i);
+  execute_window_loop(
+    window,
+    [&i, &ptr](const Coordinates &) {
+      memcpy(i.ptr(), ptr, sizeof(float));
+      ptr += sizeof(float);
+    },
+    i);
  
    tensor.unmap();
  }
diff --git a/compiler/nnc/tests/soft_backend/CompileCPP.cpp b/compiler/nnc/tests/soft_backend/CompileCPP.cpp

index 63aeb4a1b438eb66ba93a1e54e74ad8be4a05103..4ede0cf050e9656d449a185cb3ba1ec3afcacc33 100644 (file)
--- a/compiler/nnc/tests/soft_backend/CompileCPP.cpp
+++ b/compiler/nnc/tests/soft_backend/CompileCPP.cpp
@@ -101,7 +101,7 @@ int main()
    string target_compiler = "g++ -Wall --std=c++11";
  
    string compiler_command =
-      target_compiler + " -I" + output_dir + " " + main_path + " " + code_path;
+    target_compiler + " -I" + output_dir + " " + main_path + " " + code_path;
  
    // call compiler
    int res = system(compiler_command.c_str());
diff --git a/compiler/nnc/unittests/acl_backend/DOMToText.cpp b/compiler/nnc/unittests/acl_backend/DOMToText.cpp

index be0e6713cb6057c93a813be41558325a684892d5..aaf0c2055695663fa8ad50cd460812259ca652b7 100644 (file)
--- a/compiler/nnc/unittests/acl_backend/DOMToText.cpp
+++ b/compiler/nnc/unittests/acl_backend/DOMToText.cpp
@@ -148,9 +148,9 @@ TEST(acl_backend_dom_to_text, ArtifactUnaryExpr)
    const char *var_name = "id";
    shared_ptr<ArtifactId> var = AF::id(var_name);
    pair<ArtifactUnOp, const char *> test_cases[] = {
-      {ArtifactUnOp::preIncr, "++id"},   {ArtifactUnOp::preDecr, "--id"},
-      {ArtifactUnOp::heapNew, "new id"}, {ArtifactUnOp::heapFree, "delete id"},
-      {ArtifactUnOp::postIncr, "id++"},  {ArtifactUnOp::postDecr, "id--"}};
+    {ArtifactUnOp::preIncr, "++id"},   {ArtifactUnOp::preDecr, "--id"},
+    {ArtifactUnOp::heapNew, "new id"}, {ArtifactUnOp::heapFree, "delete id"},
+    {ArtifactUnOp::postIncr, "id++"},  {ArtifactUnOp::postDecr, "id--"}};
  
    for (auto test : test_cases)
    {
@@ -181,14 +181,14 @@ TEST(acl_backend_dom_to_text, ArtifactBinaryExpr)
    shared_ptr<ArtifactId> op2 = AF::id(op2_name);
  
    pair<ArtifactBinOp, const char *> test_cases[] = {
-      {ArtifactBinOp::eq, "a == b"},          {ArtifactBinOp::notEq, "a != b"},
-      {ArtifactBinOp::less, "a < b"},         {ArtifactBinOp::lessOrEq, "a <= b"},
-      {ArtifactBinOp::great, "a > b"},        {ArtifactBinOp::greatOrEq, "a >= b"},
-      {ArtifactBinOp::assign, "a = b"},       {ArtifactBinOp::plus, "a + b"},
-      {ArtifactBinOp::minus, "a - b"},        {ArtifactBinOp::mult, "a * b"},
-      {ArtifactBinOp::div, "a / b"},          {ArtifactBinOp::plusAssign, "a += b"},
-      {ArtifactBinOp::minusAssign, "a -= b"}, {ArtifactBinOp::multAssign, "a *= b"},
-      {ArtifactBinOp::divAssign, "a /= b"}};
+    {ArtifactBinOp::eq, "a == b"},          {ArtifactBinOp::notEq, "a != b"},
+    {ArtifactBinOp::less, "a < b"},         {ArtifactBinOp::lessOrEq, "a <= b"},
+    {ArtifactBinOp::great, "a > b"},        {ArtifactBinOp::greatOrEq, "a >= b"},
+    {ArtifactBinOp::assign, "a = b"},       {ArtifactBinOp::plus, "a + b"},
+    {ArtifactBinOp::minus, "a - b"},        {ArtifactBinOp::mult, "a * b"},
+    {ArtifactBinOp::div, "a / b"},          {ArtifactBinOp::plusAssign, "a += b"},
+    {ArtifactBinOp::minusAssign, "a -= b"}, {ArtifactBinOp::multAssign, "a *= b"},
+    {ArtifactBinOp::divAssign, "a /= b"}};
  
    for (auto test : test_cases)
    {
@@ -286,12 +286,12 @@ TEST(acl_backend_dom_to_text, ArtifactForLoop)
  
    shared_ptr<ArtifactVariable> iter = AF::var(var_type, var_name, {}, {AF::lit("0")});
    shared_ptr<ArtifactExpr> step =
-      AF::bin(ArtifactBinOp::plusAssign, AF::id(var_name), AF::lit("1"));
+    AF::bin(ArtifactBinOp::plusAssign, AF::id(var_name), AF::lit("1"));
    shared_ptr<ArtifactExpr> cond =
-      AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
+    AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
  
    shared_ptr<ArtifactBinaryExpr> expr =
-      AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
+    AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
  
    ArtifactForLoop loop(iter, cond, step);
  
@@ -308,10 +308,10 @@ TEST(acl_backend_dom_to_text, ArtifactIf)
    const char *var_name = "i";
  
    shared_ptr<ArtifactExpr> cond =
-      AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
+    AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
  
    shared_ptr<ArtifactBinaryExpr> expr =
-      AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
+    AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
  
    ArtifactIf if_stmt(cond);
  
@@ -415,7 +415,7 @@ static shared_ptr<ArtifactClassVariable> createClsVariable(ArtifactClass &cls, c
    list<shared_ptr<ArtifactExpr>> dims{dim1, dim2};
    list<shared_ptr<ArtifactExpr>> initializers{AF::lit("123")};
    shared_ptr<ArtifactClassVariable> var_decl =
-      cls.var(is_public, var_type, var_name, dims, initializers);
+    cls.var(is_public, var_type, var_name, dims, initializers);
    return var_decl;
  }
  
@@ -483,8 +483,8 @@ TEST(acl_backend_dom_to_text, ArtifactModule)
    const char *code_prefix = "#include \"module.h\"\n\n#include <list>\n\n#include \"bar.h\"\n\n";
    const char *code_suffix = "\nClass::Class() {\n}\n\n";
  
-  string ref_data = string(code_prefix) +
-                    string(AclArtifactUtilities, sizeof(AclArtifactUtilities)) + code_suffix;
+  string ref_data =
+    string(code_prefix) + string(AclArtifactUtilities, sizeof(AclArtifactUtilities)) + code_suffix;
    m.accept(&code_gen);
    ASSERT_EQ(code_out.str(), ref_data);
  
diff --git a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp

index a9b36a1458986f151d34776c9f575667972784bf..f411fde42a04e0f86f8a5f7454b823d8e4674692 100644 (file)
--- a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
+++ b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
@@ -117,12 +117,12 @@ void checkDomIncludes(const ArtifactModule &m)
  
    // check ordinary includes, like '#include  "artifact_data.h"'
    checkHeadersSetsEqual(
-      m.headerIncludes(),
-      {"arm_compute/core/Types.h", "arm_compute/runtime/BlobLifetimeManager.h",
-       "arm_compute/runtime/CL/CLBufferAllocator.h", "arm_compute/runtime/CL/CLFunctions.h",
-       "arm_compute/runtime/CL/CLScheduler.h", "arm_compute/runtime/MemoryManagerOnDemand.h",
-       "arm_compute/runtime/PoolManager.h"},
-      "system header includes diverged");
+    m.headerIncludes(),
+    {"arm_compute/core/Types.h", "arm_compute/runtime/BlobLifetimeManager.h",
+     "arm_compute/runtime/CL/CLBufferAllocator.h", "arm_compute/runtime/CL/CLFunctions.h",
+     "arm_compute/runtime/CL/CLScheduler.h", "arm_compute/runtime/MemoryManagerOnDemand.h",
+     "arm_compute/runtime/PoolManager.h"},
+    "system header includes diverged");
  
    checkHeadersSetsEqual(m.sourceSysIncludes(), {}, "system source includes diverged");
  }
@@ -287,10 +287,10 @@ TEST(acl_backend_mir_to_dom, conv2d)
  
    Graph g;
    OpConstructor op_generator =
-      [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
-        auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
-        return g.create<mir::ops::Conv2DOp>(inputs[0], kernel, mir::Conv2DOpAttributes());
-      };
+    [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+      auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
+      return g.create<mir::ops::Conv2DOp>(inputs[0], kernel, mir::Conv2DOpAttributes());
+    };
  
    vector<Shape> input_shapes{{1, 10, 10, channels}};
  
@@ -312,11 +312,11 @@ TEST(acl_backend_mir_to_dom, depthwise_conv)
  
    Graph g;
    OpConstructor op_generator =
-      [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
-        Conv2DOpAttributes attributes;
-        auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
-        return g.create<mir::ops::DepthwiseConv2DOp>(inputs[0], kernel, attributes);
-      };
+    [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+      Conv2DOpAttributes attributes;
+      auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
+      return g.create<mir::ops::DepthwiseConv2DOp>(inputs[0], kernel, attributes);
+    };
  
    vector<Shape> input_shapes{{1, 10, 10, channels}};
  
diff --git a/compiler/nnc/unittests/optimizations/SinkTest.cpp b/compiler/nnc/unittests/optimizations/SinkTest.cpp

index 8c5b2767e26086a70f90169d4b9e7cfba75a7604..be171d1cb06ff1bd02223fbae8afd05f97793679 100644 (file)
--- a/compiler/nnc/unittests/optimizations/SinkTest.cpp
+++ b/compiler/nnc/unittests/optimizations/SinkTest.cpp
@@ -103,7 +103,7 @@ TEST(OptPass, sinkTrConcat)
    Operation *tr1 = g.create<ops::TransposeOp>(in1->getOutput(0), vector<size_t>{0, 3, 1, 2});
    Operation *tr2 = g.create<ops::TransposeOp>(in2->getOutput(0), vector<size_t>{0, 3, 1, 2});
    Operation *conc =
-      g.create<ops::ConcatOp>(vector<Operation::Output *>{tr1->getOutput(0), tr2->getOutput(0)}, 1);
+    g.create<ops::ConcatOp>(vector<Operation::Output *>{tr1->getOutput(0), tr2->getOutput(0)}, 1);
    Operation *tanh = g.create<ops::TanhOp>(conc->getOutput(0));
    Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
    (void)out;
@@ -141,7 +141,7 @@ TEST(OptPass, sinkReluConcat)
    Operation *relu1 = g.create<ops::ReluOp>(in1->getOutput(0));
    Operation *relu2 = g.create<ops::ReluOp>(in2->getOutput(0));
    Operation *conc = g.create<ops::ConcatOp>(
-      vector<Operation::Output *>{relu1->getOutput(0), relu2->getOutput(0)}, 1);
+    vector<Operation::Output *>{relu1->getOutput(0), relu2->getOutput(0)}, 1);
    Operation *tanh = g.create<ops::TanhOp>(conc->getOutput(0));
    Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
    (void)out;
diff --git a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp

index 508ee954d971a56e070daad92457315b2b56192e..e593333fa6d35935e2590da66430b32a50d04d9f 100644 (file)
--- a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
+++ b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
@@ -120,11 +120,10 @@ namespace
   * @brief Creates graph with one operation generated by opGen function and returns this operation
   * node
   */
-mir::Operation *
-fillGraph(mir::Graph &g,
-          const function<mir::Operation *(mir::Graph &g, vector<mir::Operation::Output *> &inputs)>
-              &op_gen,
-          const vector<unique_ptr<mir::TensorVariant>> &input_ntensors)
+mir::Operation *fillGraph(
+  mir::Graph &g,
+  const function<mir::Operation *(mir::Graph &g, vector<mir::Operation::Output *> &inputs)> &op_gen,
+  const vector<unique_ptr<mir::TensorVariant>> &input_ntensors)
  {
    // Create operation inputs.
    vector<mir::Operation::Output *> inputs;
@@ -295,8 +294,8 @@ void compareResults(const mir::TensorVariant &ref_nnc_tensor, const Tensor &test
      float ref_data = mir::Tensor<float>(ref_nnc_tensor).at(nnc_idx);
      float test_data = test_art_tensor.at(artifact_idx);
      ASSERT_TRUE(areFloatsNear(ref_data, test_data, 32, 1e-5))
-        << "Tensor element " << nnc_idx << " diverged, reference: " << ref_data
-        << " test result: " << test_data;
+      << "Tensor element " << nnc_idx << " diverged, reference: " << ref_data
+      << " test result: " << test_data;
    }
  }
  
@@ -306,10 +305,10 @@ void compareResults(const mir::TensorVariant &ref_nnc_tensor, const Tensor &test
   */
  template <typename TestFunc, typename... Args>
  void createAndRunTestGraph(
-    function<mir::Operation *(mir::Graph &, const std::vector<mir::Operation::Output *> &inputs)>
-        op_generator,
-    TestFunc artifactOperation, const vector<unique_ptr<mir::TensorVariant>> &input_ntensors,
-    Args &... input_atensors)
+  function<mir::Operation *(mir::Graph &, const std::vector<mir::Operation::Output *> &inputs)>
+    op_generator,
+  TestFunc artifactOperation, const vector<unique_ptr<mir::TensorVariant>> &input_ntensors,
+  Args &... input_atensors)
  {
    mir::Graph g;
    mir::Operation *actual_operation = fillGraph(g, op_generator, input_ntensors);
@@ -657,7 +656,7 @@ TEST(cpp_operations_test, resize_NN_test)
      auto op_generator = [&res_shape](mir::Graph &g,
                                       const std::vector<mir::Operation::Output *> &inputs) {
        return g.create<mir::ops::ResizeOp>(
-          inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
+        inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
      };
  
      createAndRunTestGraph(op_generator, resize, input_ntensors, input_atensor);
@@ -668,7 +667,7 @@ TEST(cpp_operations_test, resize_NN_test_scales)
  {
    cout << "\n";
    std::vector<float> test_scales[] = {
-      {1, 2, 2, 1}, {1, 2, 3, 1}, {1, 3, 2, 1}, {1, 2.5, 2, 1}, {1, 3, 9, 1}};
+    {1, 2, 2, 1}, {1, 2, 3, 1}, {1, 3, 2, 1}, {1, 2.5, 2, 1}, {1, 3, 9, 1}};
    for (const std::vector<float> &scales : test_scales)
    {
      vector<int> input_shape_data{1, 4, 4, 1};
@@ -678,7 +677,7 @@ TEST(cpp_operations_test, resize_NN_test_scales)
      auto op_generator = [&scales](mir::Graph &g,
                                    const std::vector<mir::Operation::Output *> &inputs) {
        return g.create<mir::ops::ResizeOp>(
-          inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales);
+        inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales);
      };
      createAndRunTestGraph(op_generator, resize, input_ntensors, input_atensor);
    }
@@ -711,10 +710,10 @@ TEST(cpp_operations_test, avgpool)
              for (const auto include_pad : {false, true})
              {
                attributes.include_pad = include_pad;
-              auto op_generator = [&attributes](
-                  mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
-                return g.create<mir::ops::AvgPool2DOp>(inputs[0], attributes);
-              };
+              auto op_generator =
+                [&attributes](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+                  return g.create<mir::ops::AvgPool2DOp>(inputs[0], attributes);
+                };
  
                createAndRunTestGraph(op_generator, avgPool, input_ntensors, input_atensor);
              }
@@ -742,8 +741,9 @@ TEST(cpp_operations_test, maxpool)
              vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
              fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
  
-            auto op_generator = [&window_size, &strides](
-                mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+            auto op_generator = [&window_size,
+                                 &strides](mir::Graph &g,
+                                           const std::vector<mir::Operation::Output *> &inputs) {
                mir::MaxPool2DOpAttributes attributes;
                attributes.window = window_size;
                attributes.strides = strides;
@@ -838,7 +838,7 @@ TEST(cpp_operations_test, reduceMeanTst)
        vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
        fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
        auto op_generator = [&axis_list, keep_dims](
-          mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+                            mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
          auto op = g.create<mir::ops::ReduceMeanOp>(inputs[0], axis_list, keep_dims);
          return op;
        };
@@ -873,7 +873,8 @@ TEST(cpp_operations_test, slice4d)
    vector<int> shape_data{5, 30, 40, 12};
    vector<int> starts[] = {{0, 0, 0, 0}, {1, 1, 1, 1}, {1, 0, 1, 0}, {0, 1, 1, 0}};
    vector<int> sizes[] = {
-      {-1, -1, -1, -1}, {4, -1, 10, -1},
+    {-1, -1, -1, -1},
+    {4, -1, 10, -1},
    };
    for (auto st : starts)
    {
diff --git a/compiler/nnc/unittests/support/CommandLineTest.cpp b/compiler/nnc/unittests/support/CommandLineTest.cpp

index 73f77aa2099fd4d8b5b5fedcc491a047c8435816..993c4086f9f08ee04d5658d728e67b86f38c7aa8 100644 (file)
--- a/compiler/nnc/unittests/support/CommandLineTest.cpp
+++ b/compiler/nnc/unittests/support/CommandLineTest.cpp
@@ -69,8 +69,8 @@ Option<int32_t> NNegOpt(optname("-neg_val"),
  
  // test option with default negative value
  Option<int32_t>
-    NDefaultNegOpt(optname("-default_neg_val"),
-                   overview("description of integer option with default negative value"), -33);
+  NDefaultNegOpt(optname("-default_neg_val"),
+                 overview("description of integer option with default negative value"), -33);
  // test option with positive values
  Option<uint32_t> NPosOpt(optname("-pos_val"),
                           overview("description of integer option with positive value"), 1,
@@ -124,28 +124,28 @@ TEST(SUPPORT_NNC, verify_cl_options)
  {
    // create command line
    const char *argv[] = {
-      "CLTest", // program name
-      // string options
-      "-m", "multiopt_value",                        // second name for option with several names
-      "--single", "single_value",                    // option with single name
-      "-several_separators:SOME_VALUE1,SOME_VALUE2", // test option with several separators
-      "--one_separarot=AAA_VALUE",                   // test option whit one separator
-      "-default_val_opt",                            // test option with default value
-      "--optional_opt", "/home/guest/tmp",           // test optional option
-      "-valid_opt", "value2",                        // test options with defined values
-      // integer options
-      "-neg_val", "-42",  // test negative value for integer option
-      "-default_neg_val", // test integer option with default value
-      "-pos_val", "33",   // test positive value for integer option
-      // char options
-      "-char-opt", "b", "-dash_opt", "-",
-      // bool options
-      "-bool_opt=false", "-bool-opt2",
-      // vector of strings options
-      "-vec_opt1", "1", "c", "222", "ABC", "857", "-vec_opt2", "--vec_opt_with_vals", "abc", "123",
-      "xxx", "abc", "xxx",
-      // grouped options
-      "-group_opt1", "-group_opt2", "abc", "-group_opt3", "11", nullptr};
+    "CLTest", // program name
+    // string options
+    "-m", "multiopt_value",                        // second name for option with several names
+    "--single", "single_value",                    // option with single name
+    "-several_separators:SOME_VALUE1,SOME_VALUE2", // test option with several separators
+    "--one_separarot=AAA_VALUE",                   // test option whit one separator
+    "-default_val_opt",                            // test option with default value
+    "--optional_opt", "/home/guest/tmp",           // test optional option
+    "-valid_opt", "value2",                        // test options with defined values
+    // integer options
+    "-neg_val", "-42",  // test negative value for integer option
+    "-default_neg_val", // test integer option with default value
+    "-pos_val", "33",   // test positive value for integer option
+    // char options
+    "-char-opt", "b", "-dash_opt", "-",
+    // bool options
+    "-bool_opt=false", "-bool-opt2",
+    // vector of strings options
+    "-vec_opt1", "1", "c", "222", "ABC", "857", "-vec_opt2", "--vec_opt_with_vals", "abc", "123",
+    "xxx", "abc", "xxx",
+    // grouped options
+    "-group_opt1", "-group_opt2", "abc", "-group_opt3", "11", nullptr};
    int argc = (sizeof(argv) / sizeof(argv[0])) - 1;
  
    // It must be failed if option is not passed and other options are in the same group
diff --git a/compiler/nnc/unittests/transformations/Switcher.cpp b/compiler/nnc/unittests/transformations/Switcher.cpp

index 049ac44cda8db39983c6096e031793807d3ff1d8..2f47933693c1f5ba06d7f66add796ffa8f15eadb 100644 (file)
--- a/compiler/nnc/unittests/transformations/Switcher.cpp
+++ b/compiler/nnc/unittests/transformations/Switcher.cpp
@@ -88,7 +88,7 @@ TEST(TRANSFORMATIONS, Switcher_DWConv2D_NHWC2NCHW)
    attributes.padding_before = {67, 123};
    attributes.padding_after = {32, 356};
    auto *dw_conv =
-      g.create<mir::ops::DepthwiseConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+    g.create<mir::ops::DepthwiseConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
  
    auto *output = g.create<mir::ops::OutputOp>(dw_conv->getOutput(0));
  
@@ -138,7 +138,7 @@ TEST(TRANSFORMATIONS, Switcher_DeConv2D_NHWC2NCHW)
    attributes.padding_before = {31, 72};
    attributes.padding_after = {32, 71};
    auto *deconv =
-      g.create<mir::ops::DeConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+    g.create<mir::ops::DeConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
  
    auto *output = g.create<mir::ops::OutputOp>(deconv->getOutput(0));
  
diff --git a/compiler/nnkit-caffe/backend/CMakeLists.txt b/compiler/nnkit-caffe/backend/CMakeLists.txt

index b18aa4f11abf45b49da4168290d92e31d3f3acb2..567d954384aaa22b9dba3be0eff0600c07ec5099 100644 (file)
--- a/compiler/nnkit-caffe/backend/CMakeLists.txt
+++ b/compiler/nnkit-caffe/backend/CMakeLists.txt
@@ -1,3 +1,2 @@
  add_library(nnkit_caffe_backend SHARED Module.cpp)
  target_link_libraries(nnkit_caffe_backend nnkit_support_caffe)
-target_link_libraries(nnkit_caffe_backend stdex)
diff --git a/compiler/nnkit-caffe/backend/Module.cpp b/compiler/nnkit-caffe/backend/Module.cpp

index cb24a4e6095a8758c258026fda44e62b20490bcf..0bd39125ff5c313c521ba93396527e8d5e0780d3 100644 (file)
--- a/compiler/nnkit-caffe/backend/Module.cpp
+++ b/compiler/nnkit-caffe/backend/Module.cpp
@@ -17,11 +17,12 @@
  #include "nnkit/support/caffe/Backend.h"
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
  {
-  using stdex::make_unique;
+  using std::make_unique;
  
    auto net = make_unique<::caffe::Net<float>>(args.at(0), caffe::TEST);
  
diff --git a/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h b/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h

index 07d8d154c37dab2e9a49196adc38aae09f954390..87056dd6412a449bb622fc6ccd5585d67130de44 100644 (file)
--- a/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h
+++ b/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h
@@ -37,8 +37,8 @@ struct TensorContext
                                           const nncc::core::ADT::tensor::Reader<T> &)>;
  
    template <typename T>
-  using TypedAccessor = std::function<void(const TensorContext &, uint32_t n,
-                                           nncc::core::ADT::tensor::Accessor<T> &)>;
+  using TypedAccessor =
+    std::function<void(const TensorContext &, uint32_t n, nncc::core::ADT::tensor::Accessor<T> &)>;
  
    virtual ~TensorContext() = default;
  
diff --git a/compiler/nnkit-misc/backend/CMakeLists.txt b/compiler/nnkit-misc/backend/CMakeLists.txt

index d351d5ce55f7917ff6a19ffd3e571b5f1246ee0a..327fbab3c762ad594f0bc706a5afd39a019eaa65 100644 (file)
--- a/compiler/nnkit-misc/backend/CMakeLists.txt
+++ b/compiler/nnkit-misc/backend/CMakeLists.txt
@@ -4,7 +4,6 @@ add_library(nnkit_support_backend STATIC ${SOURCES})
  target_include_directories(nnkit_support_backend PUBLIC include)
  target_link_libraries(nnkit_support_backend PUBLIC nnkit_intf_backend)
  target_link_libraries(nnkit_support_backend PUBLIC dl)
-target_link_libraries(nnkit_support_backend PUBLIC stdex)
  
  find_package(Threads QUIET)
  
diff --git a/compiler/nnkit-misc/backend/src/BackendPlugin.cpp b/compiler/nnkit-misc/backend/src/BackendPlugin.cpp

index 54b1fdc83af9c60fdcf60e336c4c943050ece5ae..75e0763c464b2a98803a539559dbcdd80cee3e88 100644 (file)
--- a/compiler/nnkit-misc/backend/src/BackendPlugin.cpp
+++ b/compiler/nnkit-misc/backend/src/BackendPlugin.cpp
@@ -17,7 +17,7 @@
  #include "nnkit/BackendPlugin.h"
  
  #include <cassert>
-#include <stdex/Memory.h>
+#include <memory>
  #include <iostream>
  
  // NOTE dlfcn.h is not a standard library
@@ -82,7 +82,7 @@ std::unique_ptr<BackendPlugin> make_backend_plugin(const std::string &path)
      exit(1);
    }
  
-  return stdex::make_unique<BackendPlugin>(handle, entry);
+  return std::make_unique<BackendPlugin>(handle, entry);
  }
  
  } // namespace nnkit
diff --git a/compiler/nnkit-mocotf/backend/Backend.cpp b/compiler/nnkit-mocotf/backend/Backend.cpp

index 4900684eb4d783f9662fc9a36d89661c2f25317a..5983706353e13858f497d3fbb4c2522b0e2c856a 100644 (file)
--- a/compiler/nnkit-mocotf/backend/Backend.cpp
+++ b/compiler/nnkit-mocotf/backend/Backend.cpp
@@ -17,13 +17,13 @@
  #include "nnkit/support/moco/tf/Backend.h"
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <cassert>
  
  extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
  {
-  using stdex::make_unique;
+  using std::make_unique;
  
    assert(args.size() == 2); // args.at[0] : *.pb path, args.at[1]: *.info path
  
diff --git a/compiler/nnkit-mocotf/backend/CMakeLists.txt b/compiler/nnkit-mocotf/backend/CMakeLists.txt

index 72e16c75ab4fd326d336c58e8b29e3bba5d49db3..3dcd7e5647af97fc9be5c7b5c5cac16f52e69d1a 100644 (file)
--- a/compiler/nnkit-mocotf/backend/CMakeLists.txt
+++ b/compiler/nnkit-mocotf/backend/CMakeLists.txt
@@ -1,3 +1,2 @@
  add_library(nnkit_moco_tf_backend SHARED Backend.cpp)
  target_link_libraries(nnkit_moco_tf_backend nnkit_support_moco_tf)
-target_link_libraries(nnkit_moco_tf_backend stdex)
diff --git a/compiler/nnkit-mocotf/requires.cmake b/compiler/nnkit-mocotf/requires.cmake

index 6949ec8087fb87226c7cfc8818c4066fa2b8622f..1461e84433def3d2748b7c259ed8f00de2d775f0 100644 (file)
--- a/compiler/nnkit-mocotf/requires.cmake
+++ b/compiler/nnkit-mocotf/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
  # To use "nnkit_support_tftestinfo"
  require("tfinfo")
  require("loco")
diff --git a/compiler/nnkit-mocotf/support/CMakeLists.txt b/compiler/nnkit-mocotf/support/CMakeLists.txt

index 76c7c04b19936d6c319bc4ca2377cf9397e7e8cf..1b20d946bdca0f5d2abba6ba3d184fcf062c7fb1 100644 (file)
--- a/compiler/nnkit-mocotf/support/CMakeLists.txt
+++ b/compiler/nnkit-mocotf/support/CMakeLists.txt
@@ -10,4 +10,3 @@ target_link_libraries(nnkit_support_moco_tf nnkit_support_tftestinfo)
  target_link_libraries(nnkit_support_moco_tf locomotiv)
  target_link_libraries(nnkit_support_moco_tf moco_tf_frontend)
  target_link_libraries(nnkit_support_moco_tf loco)
-target_link_libraries(nnkit_support_moco_tf stdex)
diff --git a/compiler/nnkit-mocotf/support/src/Backend.cpp b/compiler/nnkit-mocotf/support/src/Backend.cpp

index 2d9e21fd70c832a13d862beb5b610b2fc7785f8c..89dd73271108a09707273e22c6655920953b47ae 100644 (file)
--- a/compiler/nnkit-mocotf/support/src/Backend.cpp
+++ b/compiler/nnkit-mocotf/support/src/Backend.cpp
@@ -25,11 +25,11 @@
  
  #include <moco/tf/Frontend.h>
  #include <moco/Names.h>
-#include <stdex/Memory.h>
  
  #include <nncc/core/ADT/tensor/Buffer.h>
  #include <nncc/core/ADT/tensor/LexicalLayout.h>
  
+#include <memory>
  #include <utility> // std::move
  #include <stdexcept>
  
@@ -116,7 +116,7 @@ Backend::Backend(const char *pb_path, const char *info_path)
  
    // set member vars
    _loco_graph = std::move(loco_graph);
-  _sess = stdex::make_unique<locomotiv::Session>(_loco_graph.get());
+  _sess = std::make_unique<locomotiv::Session>(_loco_graph.get());
  }
  
  void Backend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
@@ -131,7 +131,7 @@ void Backend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
    for (int n = 0; n < _inputs.size(); n++)
    {
      auto buf = make_buffer<float, LexicalLayout>(_inputs.at(n)->shape());
-    buf_list.emplace_back(stdex::make_unique<nncc::core::ADT::tensor::Buffer<float>>(buf));
+    buf_list.emplace_back(std::make_unique<nncc::core::ADT::tensor::Buffer<float>>(buf));
    }
  
    // fill test input values
diff --git a/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp b/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp

index 98f5007300bbd4669b1caa1aa834c475c51c96ac..25ddc09825795bfd9a557375a0a6acf442cb28f7 100644 (file)
--- a/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp
+++ b/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp
@@ -37,7 +37,7 @@ void InputTensorContext::getMutableFloatTensor(uint32_t n,
  }
  
  void InputTensorContext::getConstFloatTensor(
-    uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
+  uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
  {
    auto buf = _buffers.at(n).get();
    f(*this, n, *buf);
diff --git a/compiler/nnkit-mocotf/support/src/InputTensorContext.h b/compiler/nnkit-mocotf/support/src/InputTensorContext.h

index bbb25adeabea815fe7c02d4bc26f6c17dd9160a0..4100d229aa854a11fc6722db99f0cfcdb1adc30b 100644 (file)
--- a/compiler/nnkit-mocotf/support/src/InputTensorContext.h
+++ b/compiler/nnkit-mocotf/support/src/InputTensorContext.h
@@ -45,7 +45,7 @@ class InputTensorContext final : public TensorContext
  
  public:
    InputTensorContext(const ParsedTensors &parsed_tensors, const Buffers &buffers)
-      : TensorContext(parsed_tensors), _buffers(buffers)
+    : TensorContext(parsed_tensors), _buffers(buffers)
    { /* empty */
    }
  
diff --git a/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp b/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp

index 2b36fc67ae87c61b4eee86f7075a129a48715f89..6ef1e4598c31cd1a8d24ea7ac62c718b96276ff0 100644 (file)
--- a/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp
+++ b/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp
@@ -30,7 +30,7 @@ namespace tf
  {
  
  void OutputTensorContext::getConstFloatTensor(
-    uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
+  uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
  { // for output
    using nncc::core::ADT::tensor::LexicalLayout;
    using nncc::core::ADT::tensor::make_overlay;
diff --git a/compiler/nnkit-mocotf/support/src/OutputTensorContext.h b/compiler/nnkit-mocotf/support/src/OutputTensorContext.h

index 8cb8d8bf0a011c839991b396b8902ef4345b526d..f825729e9f6b66a4cf63bccb3155ad44c5e44e5b 100644 (file)
--- a/compiler/nnkit-mocotf/support/src/OutputTensorContext.h
+++ b/compiler/nnkit-mocotf/support/src/OutputTensorContext.h
@@ -43,7 +43,7 @@ class OutputTensorContext final : public TensorContext
  {
  public:
    OutputTensorContext(const ParsedTensors &parsed_tensors, locomotiv::Session *sess)
-      : TensorContext(parsed_tensors), _sess(sess)
+    : TensorContext(parsed_tensors), _sess(sess)
    { /* empty */
    }
  
diff --git a/compiler/nnkit-onnxrt/backend/Backend.cpp b/compiler/nnkit-onnxrt/backend/Backend.cpp

index 9247fbf341482a9e99b7f11a26185fad6fff56c1..a6c62b7b3aa47581f983ceb2e9eec456bde9a724 100644 (file)
--- a/compiler/nnkit-onnxrt/backend/Backend.cpp
+++ b/compiler/nnkit-onnxrt/backend/Backend.cpp
@@ -17,13 +17,13 @@
  #include "nnkit/support/onnx/Backend.h"
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <cassert>
  
  extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
  {
    assert(args.size() == 1); // args.at[0] : onnx file
  
-  return stdex::make_unique<::nnkit::support::onnx::Backend>(args.at(0));
+  return std::make_unique<::nnkit::support::onnx::Backend>(args.at(0));
  }
diff --git a/compiler/nnkit-onnxrt/backend/CMakeLists.txt b/compiler/nnkit-onnxrt/backend/CMakeLists.txt

index b00e5593d49a7c93197e57356e032c57489fefcd..ae462de8de629d8d68665a16cad1735fb14e4fe2 100644 (file)
--- a/compiler/nnkit-onnxrt/backend/CMakeLists.txt
+++ b/compiler/nnkit-onnxrt/backend/CMakeLists.txt
@@ -1,3 +1,2 @@
  add_library(nnkit_onnx_backend SHARED Backend.cpp)
  target_link_libraries(nnkit_onnx_backend nnkit_support_onnx)
-target_link_libraries(nnkit_onnx_backend stdex)
diff --git a/compiler/nnkit-onnxrt/requires.cmake b/compiler/nnkit-onnxrt/requires.cmake

index d370fc17cc705f110e86d6eeaa276b8e73fed307..be53ae74f8b7823c7867c81659c3098decd9cb1f 100644 (file)
--- a/compiler/nnkit-onnxrt/requires.cmake
+++ b/compiler/nnkit-onnxrt/requires.cmake
@@ -1,2 +1 @@
-require("stdex")
  require("nnkit-intf")
diff --git a/compiler/nnkit-onnxrt/support/CMakeLists.txt b/compiler/nnkit-onnxrt/support/CMakeLists.txt

index 1b51d4ed85b457b0d0533b00c3b8cf0f909438e4..3d3bb2671033bed02d8004e85daf90b91ad1a324 100644 (file)
--- a/compiler/nnkit-onnxrt/support/CMakeLists.txt
+++ b/compiler/nnkit-onnxrt/support/CMakeLists.txt
@@ -5,6 +5,5 @@ set_target_properties(nnkit_support_onnx-1.4 PROPERTIES POSITION_INDEPENDENT_COD
  target_include_directories(nnkit_support_onnx-1.4 PUBLIC include)
  target_link_libraries(nnkit_support_onnx-1.4 nnkit_intf_backend)
  target_link_libraries(nnkit_support_onnx-1.4 onnxruntime)
-target_link_libraries(nnkit_support_onnx-1.4 stdex)
  
  add_library(nnkit_support_onnx ALIAS nnkit_support_onnx-1.4)
diff --git a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h

index b38fc9bb0d1485532d3f208e8086fb2f91cf64b1..26753fed77cc4214e3dde437057c025a2807eee6 100644 (file)
--- a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h
+++ b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h
@@ -37,7 +37,7 @@ class TensorSet final
  {
  public:
    TensorSet(Allocator *allocator, size_t nums)
-      : _allocator(allocator), _names(nums), _types(nums), _dims(nums), _tensors(nums, nullptr)
+    : _allocator(allocator), _names(nums), _types(nums), _dims(nums), _tensors(nums, nullptr)
    {
      // DO NOTHING
    }
@@ -60,7 +60,7 @@ public:
      Status status;
  
      status =
-        OrtCreateTensorAsOrtValue(_allocator, dims.data(), dims.size(), type, &_tensors[index]);
+      OrtCreateTensorAsOrtValue(_allocator, dims.data(), dims.size(), type, &_tensors[index]);
      status.throwOnError();
  
      assert(OrtIsTensor(_tensors[index]));
diff --git a/compiler/nnkit-onnxrt/support/src/Runner.cpp b/compiler/nnkit-onnxrt/support/src/Runner.cpp

index bc6a81a5c548f75b797d9157bb3d9fe6b528dca2..8159ed7c2c06775394d3bbbf5fb5a2aa292026e9 100644 (file)
--- a/compiler/nnkit-onnxrt/support/src/Runner.cpp
+++ b/compiler/nnkit-onnxrt/support/src/Runner.cpp
@@ -17,7 +17,7 @@
  #include "nnkit/support/onnx/Runner.h"
  #include "nnkit/support/onnx/Status.h"
  
-#include <stdex/Memory.h>
+#include <memory>
  #include <cassert>
  
  namespace nnkit
@@ -27,7 +27,7 @@ namespace support
  namespace onnx
  {
  
-Runner::Runner(const std::string &path) : _allocator(stdex::make_unique<Allocator>())
+Runner::Runner(const std::string &path) : _allocator(std::make_unique<Allocator>())
  {
    Status status;
  
@@ -61,7 +61,7 @@ void Runner::prepareInputs(void)
    status = OrtSessionGetInputCount(_session, &num_input_nodes);
    status.throwOnError();
  
-  _inputs = stdex::make_unique<TensorSet>(_allocator.get(), num_input_nodes);
+  _inputs = std::make_unique<TensorSet>(_allocator.get(), num_input_nodes);
  
    for (size_t i = 0; i < num_input_nodes; ++i)
    {
@@ -113,7 +113,7 @@ void Runner::prepareOutputs(void)
    status = OrtSessionGetOutputCount(_session, &num_output_nodes);
    status.throwOnError();
  
-  _outputs = stdex::make_unique<TensorSet>(_allocator.get(), num_output_nodes);
+  _outputs = std::make_unique<TensorSet>(_allocator.get(), num_output_nodes);
  
    for (size_t i = 0; i < num_output_nodes; ++i)
    {
diff --git a/compiler/nnkit-tf/backend/Backend.cpp b/compiler/nnkit-tf/backend/Backend.cpp

index ee0476469c8eac694ea8ec9169e6c6b4f6c60059..99c857e462fe43aa0a756a4a77f3c2c35bdbc7d8 100644 (file)
--- a/compiler/nnkit-tf/backend/Backend.cpp
+++ b/compiler/nnkit-tf/backend/Backend.cpp
@@ -17,13 +17,13 @@
  #include "nnkit/support/tf/Backend.h"
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
  
+#include <memory>
  #include <cassert>
  
  extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
  {
-  using stdex::make_unique;
+  using std::make_unique;
  
    assert(args.size() == 2); // args.at[0] : test.pb path, argas.at[1]: test.info path
  
diff --git a/compiler/nnkit-tf/backend/CMakeLists.txt b/compiler/nnkit-tf/backend/CMakeLists.txt

index dd2e469e811768fb82db43f46e523f3cf96e26a8..d0078453ed4ab8084e0bdc1da1f5e397ba097f95 100644 (file)
--- a/compiler/nnkit-tf/backend/CMakeLists.txt
+++ b/compiler/nnkit-tf/backend/CMakeLists.txt
@@ -1,3 +1,2 @@
  add_library(nnkit_tf_backend SHARED Backend.cpp)
  target_link_libraries(nnkit_tf_backend nnkit_support_tf)
-target_link_libraries(nnkit_tf_backend stdex)
diff --git a/compiler/nnkit-tf/requires.cmake b/compiler/nnkit-tf/requires.cmake

index 4b9fd68b29a2a358b27e6c2144e07b4be6f513bb..a757bdda4fa75d2a34dbacc80ab9e7266b62ae56 100644 (file)
--- a/compiler/nnkit-tf/requires.cmake
+++ b/compiler/nnkit-tf/requires.cmake
@@ -1,3 +1,2 @@
-require("stdex")
  require("tfinfo")
  require("nnkit-intf")
diff --git a/compiler/nnkit-tf/support/CMakeLists.txt b/compiler/nnkit-tf/support/CMakeLists.txt

index 0f5c0a6dd3c081b3121fe4338bc2de64c7bbbad0..d064131ea986a412aa8ef448dbad2f981a1386d2 100644 (file)
--- a/compiler/nnkit-tf/support/CMakeLists.txt
+++ b/compiler/nnkit-tf/support/CMakeLists.txt
@@ -3,7 +3,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  add_library(nnkit_support_tf-1.13 STATIC ${SOURCES})
  set_target_properties(nnkit_support_tf-1.13 PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(nnkit_support_tf-1.13 PUBLIC include)
-target_link_libraries(nnkit_support_tf-1.13 nnkit_intf_backend stdex nnkit_support_tftestinfo)
+target_link_libraries(nnkit_support_tf-1.13 nnkit_intf_backend nnkit_support_tftestinfo)
  target_link_libraries(nnkit_support_tf-1.13 tensorflow-1.13)
  
  add_library(nnkit_support_tf ALIAS nnkit_support_tf-1.13)
diff --git a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h

index f1ecd6c9c7048b856183d1d2e2542e0b28232398..fec61473385af3534b27c8d80c579e98a42f92d8 100644 (file)
--- a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h
+++ b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h
@@ -36,7 +36,7 @@ class TensorContext final : public nnkit::TensorContext
  {
  public:
    TensorContext(const std::vector<std::unique_ptr<ParsedTensor>> &tensors, TensorDataMap &data_map)
-      : _tensors(tensors), _data_map(data_map)
+    : _tensors(tensors), _data_map(data_map)
    {
      // empty
    }
diff --git a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h

index daa1a95b3ce627a17ac0920db6059fc4ec280534..5b12aa9a70e70695fe0548b358055c889fbdc733 100644 (file)
--- a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h
+++ b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h
@@ -41,7 +41,9 @@ using nnkit::support::tftestinfo::ParsedTensor;
  class TensorDataMap
  {
  public:
-  TensorDataMap() { /* empty */}
+  TensorDataMap()
+  { /* empty */
+  }
  
    uint8_t *allocate(const ParsedTensor *parsed_tensor)
    {
diff --git a/compiler/nnkit-tf/support/src/Backend.cpp b/compiler/nnkit-tf/support/src/Backend.cpp

index f28e05f74e0e377a8c814311c28cfad05d51ee1e..54bc4984d9aa200fc6b6a715c45ff6e1d8dc9322 100644 (file)
--- a/compiler/nnkit-tf/support/src/Backend.cpp
+++ b/compiler/nnkit-tf/support/src/Backend.cpp
@@ -50,7 +50,7 @@ Backend::Backend(const char *pb_path, const char *info_path) : _tf_runner(pb_pat
          angkor::TensorShape shape;
          if (!_tf_runner.getTensorShapeFromGraphDef(parsed_tensor, shape))
            throw oops::UserExn(
-              "Info you provided may be wrong or not enough. Please check the info file.");
+            "Info you provided may be wrong or not enough. Please check the info file.");
  
          parsed_tensor->mutable_shape().resize(shape.rank());
          for (int r = 0; r < shape.rank(); r++)
diff --git a/compiler/nnkit-tf/support/src/Runner.cpp b/compiler/nnkit-tf/support/src/Runner.cpp

index 0d36ee2f454c4d2a5cd39bb0a203f1462f60eac5..d2c37cd2999e40ee5fc1ea7cb91b3496fc7a638b 100644 (file)
--- a/compiler/nnkit-tf/support/src/Runner.cpp
+++ b/compiler/nnkit-tf/support/src/Runner.cpp
@@ -263,8 +263,8 @@ void Runner::prepareInputs(const std::vector<std::unique_ptr<ParsedTensor>> &inp
        throw std::runtime_error("Not supported tensor type");
  
      TF_Tensor *input_tensor =
-        create_tensor(TF_FLOAT, shape.data(), shape.size(), data_map.data(tensor.get()),
-                      num_elements(tensor->shape()) * size);
+      create_tensor(TF_FLOAT, shape.data(), shape.size(), data_map.data(tensor.get()),
+                    num_elements(tensor->shape()) * size);
  
      _input_ops.emplace_back(input_op);
      _input_tensors.emplace_back(input_tensor);
@@ -308,7 +308,7 @@ void Runner::run()
                  0,       // Target operations, number of targets.
                  nullptr, // Run metadata.
                  _status  // Output status.
-                );
+  );
  
    if (TF_GetCode(_status) != TF_OK)
      throw std::runtime_error(TF_Message(_status));
diff --git a/compiler/nnkit-tflite/backend/Backend.cpp b/compiler/nnkit-tflite/backend/Backend.cpp

index 08ba338e8073b9c85fc655ca8c02ff46d4c66c9b..b84c5076e5eb5218d010c45645e9253903e9a241 100644 (file)
--- a/compiler/nnkit-tflite/backend/Backend.cpp
+++ b/compiler/nnkit-tflite/backend/Backend.cpp
@@ -51,12 +51,13 @@ private:
    std::unique_ptr<::tflite::FlatBufferModel> _model;
    std::unique_ptr<::tflite::Interpreter> _interp;
  };
-}
+} // namespace
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
  {
-  return stdex::make_unique<GenericBackend>(args.at(0));
+  return std::make_unique<GenericBackend>(args.at(0));
  }
diff --git a/compiler/nnkit-tflite/backend/CMakeLists.txt b/compiler/nnkit-tflite/backend/CMakeLists.txt

index 3f4a8ca539a5530a1e8680c26691550f6493bb95..31606b15eca03ad2761efeb7c8f61ef7fac06917 100644 (file)
--- a/compiler/nnkit-tflite/backend/CMakeLists.txt
+++ b/compiler/nnkit-tflite/backend/CMakeLists.txt
@@ -4,4 +4,3 @@ endif(NOT TARGET nnkit_support_tflite)
  
  add_library(nnkit_tflite_backend SHARED Backend.cpp)
  target_link_libraries(nnkit_tflite_backend nnkit_support_tflite)
-target_link_libraries(nnkit_tflite_backend stdex)
diff --git a/compiler/nnkit-tflite/requires.cmake b/compiler/nnkit-tflite/requires.cmake

index d370fc17cc705f110e86d6eeaa276b8e73fed307..be53ae74f8b7823c7867c81659c3098decd9cb1f 100644 (file)
--- a/compiler/nnkit-tflite/requires.cmake
+++ b/compiler/nnkit-tflite/requires.cmake
@@ -1,2 +1 @@
-require("stdex")
  require("nnkit-intf")
diff --git a/compiler/nnkit/actions/HDF5/CMakeLists.txt b/compiler/nnkit/actions/HDF5/CMakeLists.txt

index 63d3320c5772c600830fc56e2d4b45b3d88c126e..0b1e2e516ab17c480e4f7a9a8a9095590cea4c2b 100644 (file)
--- a/compiler/nnkit/actions/HDF5/CMakeLists.txt
+++ b/compiler/nnkit/actions/HDF5/CMakeLists.txt
@@ -12,10 +12,8 @@ add_library(nnkit_HDF5_export_action SHARED Export.cpp)
  target_include_directories(nnkit_HDF5_export_action PRIVATE ${HDF5_INCLUDE_DIRS})
  target_link_libraries(nnkit_HDF5_export_action nnkit_intf_action)
  target_link_libraries(nnkit_HDF5_export_action nnkit_HDF5_common)
-target_link_libraries(nnkit_HDF5_export_action stdex)
  
  add_library(nnkit_HDF5_import_action SHARED Import.cpp)
  target_include_directories(nnkit_HDF5_import_action PRIVATE ${HDF5_INCLUDE_DIRS})
  target_link_libraries(nnkit_HDF5_import_action nnkit_intf_action)
  target_link_libraries(nnkit_HDF5_import_action nnkit_HDF5_common)
-target_link_libraries(nnkit_HDF5_import_action stdex)
diff --git a/compiler/nnkit/actions/HDF5/Export.cpp b/compiler/nnkit/actions/HDF5/Export.cpp

index 389f5c05004f1939acbee91d18154705ab2094ee..f21a7ff4e46cab0d2012c4272e65517f9b61c461 100644 (file)
--- a/compiler/nnkit/actions/HDF5/Export.cpp
+++ b/compiler/nnkit/actions/HDF5/Export.cpp
@@ -58,7 +58,7 @@ public:
          H5::DataSpace dataspace(rank, dims);
  
          auto dataset =
-            _value_grp.createDataSet(value_filename(n), H5::PredType::IEEE_F32BE, dataspace);
+          _value_grp.createDataSet(value_filename(n), H5::PredType::IEEE_F32BE, dataspace);
  
          float *data = new float[nncc::core::ADT::tensor::num_elements(shape)];
  
@@ -84,7 +84,7 @@ public:
            H5::StrType name_datatype(H5::PredType::C_S1, name.size());
  
            auto name_attr =
-              _name_grp.createAttribute(value_filename(n), name_datatype, name_dataspace);
+            _name_grp.createAttribute(value_filename(n), name_datatype, name_dataspace);
  
            name_attr.write(name_datatype, name);
          }
@@ -101,9 +101,10 @@ private:
  };
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
  {
-  return stdex::make_unique<HD5ExportAction>(args.at(0));
+  return std::make_unique<HD5ExportAction>(args.at(0));
  }
diff --git a/compiler/nnkit/actions/HDF5/Import.cpp b/compiler/nnkit/actions/HDF5/Import.cpp

index bba5ab7018f1544a65911669aff58a015caeacbb..069f42f569ce200463c4aef5855c53706ea5edae 100644 (file)
--- a/compiler/nnkit/actions/HDF5/Import.cpp
+++ b/compiler/nnkit/actions/HDF5/Import.cpp
@@ -92,9 +92,10 @@ private:
  };
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
  {
-  return stdex::make_unique<HD5ImportAction>(args.at(0));
+  return std::make_unique<HD5ImportAction>(args.at(0));
  }
diff --git a/compiler/nnkit/actions/builtin/CMakeLists.txt b/compiler/nnkit/actions/builtin/CMakeLists.txt

index 910e12ea9fd79b4f424e491523d5f4ca0157f940..4de70dfc3473ef49cd255e8690315011c94a0add 100644 (file)
--- a/compiler/nnkit/actions/builtin/CMakeLists.txt
+++ b/compiler/nnkit/actions/builtin/CMakeLists.txt
@@ -1,7 +1,5 @@
  add_library(nnkit_show_action SHARED Show.cpp)
  target_link_libraries(nnkit_show_action nnkit_intf_action)
-target_link_libraries(nnkit_show_action stdex)
  
  add_library(nnkit_randomize_action SHARED Randomize.cpp)
  target_link_libraries(nnkit_randomize_action nnkit_intf_action)
-target_link_libraries(nnkit_randomize_action stdex)
diff --git a/compiler/nnkit/actions/builtin/Randomize.cpp b/compiler/nnkit/actions/builtin/Randomize.cpp

index 9b023ef3ba79b769dc0dbc70c8e10d2b1f0faad1..b6e17c7c38fe59eb88643357c92864814c3ac959 100644 (file)
--- a/compiler/nnkit/actions/builtin/Randomize.cpp
+++ b/compiler/nnkit/actions/builtin/Randomize.cpp
@@ -52,9 +52,10 @@ struct RandomizeAction final : public nnkit::Action
  };
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
  {
-  return stdex::make_unique<RandomizeAction>();
+  return std::make_unique<RandomizeAction>();
  }
diff --git a/compiler/nnkit/actions/builtin/Show.cpp b/compiler/nnkit/actions/builtin/Show.cpp

index 2630177ef4b26ed95e6a457ae70f35fa83e57e42..0be15a8cd2badfe245b82a73e052493a68529dae 100644 (file)
--- a/compiler/nnkit/actions/builtin/Show.cpp
+++ b/compiler/nnkit/actions/builtin/Show.cpp
@@ -63,9 +63,10 @@ void ShowAction::run(nnkit::TensorContext &ctx)
  }
  
  #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
  {
-  return stdex::make_unique<ShowAction>();
+  return std::make_unique<ShowAction>();
  }
diff --git a/compiler/nnkit/tools/benchmark/CMakeLists.txt b/compiler/nnkit/tools/benchmark/CMakeLists.txt

index c2cde00f40d1b4f35d8b928d1f06869e02f7bf03..7f01f8bd1703ddf5d620d1d1c356fd9106958836 100644 (file)
--- a/compiler/nnkit/tools/benchmark/CMakeLists.txt
+++ b/compiler/nnkit/tools/benchmark/CMakeLists.txt
@@ -11,4 +11,3 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  add_executable(nnkit-benchmark ${SOURCES})
  target_link_libraries(nnkit-benchmark nnkit_support_cmdline)
  target_link_libraries(nnkit-benchmark nnkit_support_backend)
-target_link_libraries(nnkit-benchmark stdex)
diff --git a/compiler/nnkit/tools/benchmark/src/Benchmark.cpp b/compiler/nnkit/tools/benchmark/src/Benchmark.cpp

index 6c3ebc90bb81846b8b8120ef95dea4297978a87f..632c989bd3e96432225b68ebdf183407e5eb8b1c 100644 (file)
--- a/compiler/nnkit/tools/benchmark/src/Benchmark.cpp
+++ b/compiler/nnkit/tools/benchmark/src/Benchmark.cpp
@@ -18,8 +18,7 @@
  #include <nnkit/VectorArguments.h>
  #include <nnkit/BackendPlugin.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <map>
  #include <string>
  
@@ -28,7 +27,7 @@
  #include <iostream>
  #include <iomanip>
  
-using stdex::make_unique;
+using std::make_unique;
  
  using std::chrono::milliseconds;
  using std::chrono::microseconds;
diff --git a/compiler/nnkit/tools/run/CMakeLists.txt b/compiler/nnkit/tools/run/CMakeLists.txt

index 5f42ed94106ea5443183b491e295b8e8a2ed2735..d1b7160904311f3e656aac5b623c515681cfc991 100644 (file)
--- a/compiler/nnkit/tools/run/CMakeLists.txt
+++ b/compiler/nnkit/tools/run/CMakeLists.txt
@@ -19,4 +19,3 @@ target_link_libraries(nnkit-run nnkit_intf_action)
  target_link_libraries(nnkit-run nnkit_intf_backend)
  target_link_libraries(nnkit-run nnkit_support_cmdline)
  target_link_libraries(nnkit-run nnkit_support_backend)
-target_link_libraries(nnkit-run stdex)
diff --git a/compiler/nnkit/tools/run/nnkit-run.cpp b/compiler/nnkit/tools/run/nnkit-run.cpp

index e60e5797a805118613dabbc36d5d0f3045f9d990..cc5a337bdfbba8e64d521b01f69d78bc488a6ca7 100644 (file)
--- a/compiler/nnkit/tools/run/nnkit-run.cpp
+++ b/compiler/nnkit/tools/run/nnkit-run.cpp
@@ -35,7 +35,7 @@ public:
  private:
    nnkit::VectorArguments _args;
  };
-}
+} // namespace
  
  namespace
  {
@@ -59,7 +59,7 @@ private:
    std::string _path;
    std::unique_ptr<nnkit::BackendPlugin> _plugin;
  };
-}
+} // namespace
  
  // TODO Extract Action-related helpers
  #include <nnkit/Action.h>
@@ -120,7 +120,7 @@ private:
    void *_handle;
    Entry _entry;
  };
-}
+} // namespace
  
  namespace
  {
@@ -139,10 +139,9 @@ public:
  private:
    ActionBinder _binder;
  };
-}
-
-#include <stdex/Memory.h>
+} // namespace
  
+#include <memory>
  #include <map>
  #include <iostream>
  
@@ -170,7 +169,7 @@ int main(int argc, char **argv)
    std::map<std::string, std::function<void(const std::string &arg)>> argparse;
  
    argparse["--backend"] = [&sections](const std::string &tag) {
-    sections.backend = stdex::make_unique<BackendSection>(tag);
+    sections.backend = std::make_unique<BackendSection>(tag);
    };
  
    argparse["--backend-arg"] = [&sections](const std::string &arg) {
diff --git a/compiler/nnop/include/nnop/PadInfo.h b/compiler/nnop/include/nnop/PadInfo.h

index 228f0851420373c7fad6a091405db7489bfd7cd3..d17a33abf0f87b43c62479f52cdc17d9910724cb 100644 (file)
--- a/compiler/nnop/include/nnop/PadInfo.h
+++ b/compiler/nnop/include/nnop/PadInfo.h
@@ -26,7 +26,7 @@ class PadInfo
  {
  public:
    PadInfo(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
-      : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+    : _top{top}, _bottom{bottom}, _left{left}, _right{right}
    {
      // DO NOTHING
    }
diff --git a/compiler/nnop/include/nnop/StrideInfo.h b/compiler/nnop/include/nnop/StrideInfo.h

index e47489fa7456bf1254d0d2042eb0a8b589a9fe06..653603d6c0b203e6ede14d1746eecdf4b9c7e7b4 100644 (file)
--- a/compiler/nnop/include/nnop/StrideInfo.h
+++ b/compiler/nnop/include/nnop/StrideInfo.h
@@ -39,6 +39,6 @@ private:
    uint32_t _vertical;
  };
  
-} // namespace nncc
+} // namespace nnop
  
  #endif // __NNOP_STRIDE_INFO_H__
diff --git a/compiler/nnsuite/conv/model/src/RandomModel.cpp b/compiler/nnsuite/conv/model/src/RandomModel.cpp

index 7b15d4c96367acbd08e1c2d9f7c27df0fb37b82d..6d4a6147d08b52de7c90b1d189de5516a0b89290 100644 (file)
--- a/compiler/nnsuite/conv/model/src/RandomModel.cpp
+++ b/compiler/nnsuite/conv/model/src/RandomModel.cpp
@@ -28,8 +28,8 @@ namespace conv
  {
  
  RandomModel::RandomModel(int32_t seed)
-    : _ifm_shape{1, 8, 8}, _ifm_name{"ifm"}, _ofm_name{"ofm"}, _ofm_shape{2, 6, 6},
-      _ker_buffer{kernel::Shape{2, 1, 3, 3}, kernel::NCHWLayout{}}
+  : _ifm_shape{1, 8, 8}, _ifm_name{"ifm"}, _ofm_name{"ofm"}, _ofm_shape{2, 6, 6},
+    _ker_buffer{kernel::Shape{2, 1, 3, 3}, kernel::NCHWLayout{}}
  {
    std::default_random_engine gen{static_cast<uint32_t>(seed)};
    std::normal_distribution<float> dist{0.0f, 1.0f};
diff --git a/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt b/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt

index 6445cc6fb47789638f3da9de26a0bf7d10e22d92..7e860f874c287b9c1f38fc7b61435252dda31c5e 100644 (file)
--- a/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt
+++ b/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt
@@ -9,7 +9,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
  add_library(nnsuite_conv_caffe SHARED ${SOURCES})
  target_link_libraries(nnsuite_conv_caffe nnsuite_conv)
  target_link_libraries(nnsuite_conv_caffe nnkit_support_caffe)
-target_link_libraries(nnsuite_conv_caffe stdex)
  
  nnas_find_package(GTest QUIET)
  
diff --git a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp

index 31d2b33fcb98f7f3d02e78bda42cc3584608f101..664ca94f3a934b38622ceb6ac8052bf55e49dcee 100644 (file)
--- a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp
+++ b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp
@@ -23,9 +23,9 @@
  #include <nncc/core/ADT/kernel/Overlay.h>
  #include <nncc/core/ADT/kernel/NCHWLayout.h>
  
-#include <stdex/Memory.h>
+#include <memory>
  
-using stdex::make_unique;
+using std::make_unique;
  
  std::unique_ptr<nnkit::Backend> ConvBackend::create(const nnsuite::conv::Model &model)
  {
diff --git a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp

index 776bf186b9687806f69ae6f0dac8d5e0d5b74798..20c42385aabe881606ed8baf4a83a19ae7bdbc5c 100644 (file)
--- a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp
+++ b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp
@@ -35,8 +35,8 @@ public:
    TestModel(const std::string &ifm_name, const feature::Shape &ifm_shape,
              const std::string &ofm_name, const feature::Shape &ofm_shape,
              const kernel::Shape &ker_shape, const kernel::Layout &ker_layout, float *ker_data)
-      : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name), _ofm_shape(ofm_shape),
-        _ker{ker_shape, ker_layout, ker_data}
+    : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name),
+      _ofm_shape(ofm_shape), _ker{ker_shape, ker_layout, ker_data}
    {
      // DO NOTHING
    }
diff --git a/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt b/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt

index c1cf88812c09d25829a0415031d20ef3649c25a1..8e870490e2249c9e2ce612ea92bb3461a86f643f 100644 (file)
--- a/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt
+++ b/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt
@@ -9,7 +9,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
  add_library(nnsuite_conv_tflite SHARED ${SOURCES})
  target_link_libraries(nnsuite_conv_tflite nnsuite_conv)
  target_link_libraries(nnsuite_conv_tflite nnkit_support_tflite-1.7)
-target_link_libraries(nnsuite_conv_tflite stdex)
  
  nnas_find_package(GTest QUIET)
  
diff --git a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp

index 8ec9ce491a7415ffa51d5338149aadb66a019ad9..ea189ff6ea46341232bde92ba4ff826a491aa6a3 100644 (file)
--- a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp
+++ b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp
@@ -74,7 +74,7 @@ static inline std::vector<int> as_dims(const nncc::core::ADT::kernel::Shape &sha
  }
  
  ConvBackend::ConvBackend(const nnsuite::conv::Model &model)
-    : _ifm_name{model.ifm_name()}, _ofm_name{model.ofm_name()}
+  : _ifm_name{model.ifm_name()}, _ofm_name{model.ofm_name()}
  {
    using nncc::core::ADT::kernel::Overlay;
    using nncc::core::ADT::kernel::NHWCLayout;
@@ -123,12 +123,12 @@ ConvBackend::ConvBackend(const nnsuite::conv::Model &model)
                                         as_dims(model.ifm_shape()), quantization);
  
    _interp.SetTensorParametersReadOnly(
-      2, kTfLiteFloat32 /* type */, "kernel" /* name */, as_dims(model.ker_shape()), quantization,
-      reinterpret_cast<const char *>(_kernel.data()), _kernel.size() * sizeof(float));
+    2, kTfLiteFloat32 /* type */, "kernel" /* name */, as_dims(model.ker_shape()), quantization,
+    reinterpret_cast<const char *>(_kernel.data()), _kernel.size() * sizeof(float));
  
    _interp.SetTensorParametersReadOnly(
-      3, kTfLiteFloat32 /* type */, "bias" /* name */, {static_cast<int>(_bias.size())},
-      quantization, reinterpret_cast<const char *>(_bias.data()), _bias.size() * sizeof(float));
+    3, kTfLiteFloat32 /* type */, "bias" /* name */, {static_cast<int>(_bias.size())}, quantization,
+    reinterpret_cast<const char *>(_bias.data()), _bias.size() * sizeof(float));
  
    auto param = typed_malloc<TfLiteConvParams>();
  
diff --git a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp

index db82f0cf9254552a6753eb5ebc6d14973d074a22..98ac78fc252c3ce7bbb4c6ab62df8abba7c8b0cf 100644 (file)
--- a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp
+++ b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp
@@ -38,8 +38,8 @@ public:
    TestModel(const std::string &ifm_name, const feature::Shape &ifm_shape,
              const std::string &ofm_name, const feature::Shape &ofm_shape,
              const kernel::Shape &ker_shape, const kernel::Layout &ker_layout, float *ker_data)
-      : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name), _ofm_shape(ofm_shape),
-        _ker{ker_shape, ker_layout, ker_data}
+    : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name),
+      _ofm_shape(ofm_shape), _ker{ker_shape, ker_layout, ker_data}
    {
      // DO NOTHING
    }
diff --git a/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp b/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp

index 2c84f72e6a20592d5fb2e0cbe9b4a273e9acfb38..c1e013767dca76abd6abe13a44a55de7c5b03386 100644 (file)
--- a/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp
+++ b/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp
@@ -21,8 +21,7 @@
  #include <nnkit/Backend.h>
  #include <nnkit/CmdlineArguments.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <chrono>
  #include <iostream>
  
@@ -40,5 +39,5 @@ extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArgu
  
    const nnsuite::conv::RandomModel model{seed};
  
-  return stdex::make_unique<ConvBackend>(model);
+  return std::make_unique<ConvBackend>(model);
  }
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt

index 1472295c3b1b747991639d32c0e1dd58bc0ec10f..7d7a28fe12910e98130dac6eac9f14764252723c 100644 (file)
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -4,6 +4,7 @@ set(ONE_COMMAND_FILES
      one-import-bcq
      one-import-tf
      one-import-tflite
+    one-import-onnx
      one-optimize
      one-quantize
      one-pack
@@ -78,4 +79,6 @@ if(NOT ENABLE_TEST)
    return()
  endif(NOT ENABLE_TEST)
  
+add_subdirectory(dummy-driver)
  add_subdirectory(tests)
+add_subdirectory(validate-onnx2circle)
diff --git a/compiler/one-cmds/dummy-driver/CMakeLists.txt b/compiler/one-cmds/dummy-driver/CMakeLists.txt

new file mode 100644 (file)

index 0000000..8e122cc
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/CMakeLists.txt
@@ -0,0 +1,21 @@
+# dummy driver for interface test
+set(DUMMY_DRIVER_SRC src/dummy-compile.cpp)
+set(HELP_DRIVER_SRC src/help-compile.cpp)
+
+add_executable(dummy-compile ${DUMMY_DRIVER_SRC})
+add_executable(help-compile ${HELP_DRIVER_SRC})
+
+set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile")
+set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile")
+
+install(FILES ${DUMMY_DRIVER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${HELP_DRIVER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-compile.cpp b/compiler/one-cmds/dummy-driver/src/dummy-compile.cpp

new file mode 100644 (file)

index 0000000..2ad09a3
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-compile.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-compile only tests its interface rather than its functionality.
+ *
+ * ./dummy-compile -o ${OUTPUT_NAME} ${INPUT_NAME}
+ *
+ * NOTE argv[3](INPUT_NAME) is not used here.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 4)
+    return EXIT_FAILURE;
+
+  std::string opt_o{"-o"};
+  std::string argv_1{argv[1]};
+
+  if (opt_o != argv_1)
+    return EXIT_FAILURE;
+
+  std::string output_name{argv[2]};
+  std::ofstream outfile(output_name);
+
+  outfile << "dummy-compile dummy output!!" << std::endl;
+
+  outfile.close();
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/help-compile.cpp b/compiler/one-cmds/dummy-driver/src/help-compile.cpp

new file mode 100644 (file)

index 0000000..9be9018
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/help-compile.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * help-compile prints dummy help message.
+ *
+ * $ ./help-compile -h
+ * HELP MESSAGE!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::string opt_h{"-h"};
+  std::string argv_1{argv[1]};
+
+  if (opt_h != argv_1)
+    return EXIT_FAILURE;
+
+  std::cout << "HELP MESSAGE!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/how-to-prepare-virtualenv.txt b/compiler/one-cmds/how-to-prepare-virtualenv.txt

index f3dcf704b62f28b1573708e37c8cd1b5153364ca..6d846c0813a71e25c894c05182e420abab6167a2 100644 (file)
--- a/compiler/one-cmds/how-to-prepare-virtualenv.txt
+++ b/compiler/one-cmds/how-to-prepare-virtualenv.txt
@@ -9,6 +9,9 @@ This document explains about 'one-prepare-venv' command.
  version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
  command can execute properly.
  
+'one-prepare-venv' will also prepare onnx and onnx-tensorflow version 1.7.0 so
+that 'one-import-onnx' command can execute properly.
+
  
  Prerequisite
  ------------
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt

index d4e3269e8b8a3a522932abf066312e40ea4fa4eb..d034fa9a2b7260c1cd3e065bef775aeb214f6a06 100644 (file)
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -149,8 +149,16 @@ one-optimize
  one-optimize provides network or operator transformation shown below.
  
  Current transformation options are
+- disable_validation : This will turn off operator validations.
+- fold_add_v2 : This removes AddV2 operation which can be folded
+- fold_cast : This removes Cast operation which can be folded
  - fold_dequantize : This removes Dequantize operation which can be folded
+- fold_sparse_to_dense : This removes SparseToDense operation which can be folded
+- forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
  - fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible
+- fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator
+- fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator
+- fuse_batchnorm_with_tconv : This fuses BatchNorm operator to transpose convolution operator
  - fuse_bcq: This enables Binary-Coded-bases Quantized DNNs
     - read https://arxiv.org/abs/2005.09904 for detailed information
  - fuse_instnorm: This will convert instance normalization related operators to
@@ -161,12 +169,30 @@ Current transformation options are
  - make_batchnorm_gamma_positive: This makes negative gamma of batch normalization into a small positive value (1e-10).
    Note that this pass can change the execution result of the model.
    So, use it only when the impact is known to be acceptable.
+- mute_warnings : This will turn off warning messages.
+- generate_profile_data : This will turn on profiling data generation.
+- remove_redundant_reshape : This fuses or removes redundant reshape operators.
+- remove_redundant_transpose : This fuses or removes redundant transpose operators.
+- remove_unnecessary_reshape : This removes unnecessary reshape operators.
+- remove_unnecessary_slice : This removes unnecessary slice operators.
+- remove_unnecessary_strided_slice : This removes unnecessary strided slice operators.
+- remove_unnecessary_split : This removes unnecessary split operators.
  - replace_cw_mul_add_with_depthwise_conv: This will replace channel-wise Mul/Add with DepthwiseConv2D.
  - resolve_customop_add: This will convert Custom(Add) to normal Add operator
  - resolve_customop_batchmatmul: This will convert Custom(BatchMatMul) to
    normal BatchMatMul operator
  - resolve_customop_matmul: This will convert Custom(MatMul) to normal MatMul
    operator
+- shuffle_weight_to_16x1float32 : This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32.
+  Note that it only converts weights whose row is a multiple of 16.
+- substitute_pack_to_reshape : This will convert single input Pack to Reshape.
+- substitute_squeeze_to_reshape : This will convert certain condition Squeeze to Reshape.
+- substitute_transpose_to_reshape : This will convert certain condition Transpose to Reshape.
+- transform_min_max_to_relu6: This will transform Minimum-Maximum pattern to Relu6 operator.
+
+There are options to enable multiple options at once for convenience.
+- O1: fuse_bcq, fuse_instnorm, resolve_customop_add, resolve_customop_batchmatmul,
+  resolve_customop_matmul, remove_redundant_transpose, substitute_pack_to_reshape
  
  
  one-quantize
diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build

index 82b193f9e864514116c56aca5e55356c33ce403a..34ce42fca9461de4eb5c94e1cd24491a986ea790 100644 (file)
--- a/compiler/one-cmds/one-build
+++ b/compiler/one-cmds/one-build
@@ -3,6 +3,7 @@
  ''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
  ''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
  ''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
  
  # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  #
@@ -57,7 +58,9 @@ def _get_driver_name(driver_name):
          'one-import-bcq': 'one-import-bcq',
          'one-import-tf': 'one-import-tf',
          'one-import-tflite': 'one-import-tflite',
+        'one-import-onnx': 'one-import-onnx',
          'one-optimize': 'one-optimize',
+        'one-quantize': 'one-quantize',
          'one-pack': 'one-pack',
          'one-codegen': 'one-codegen'
      }[driver_name]
@@ -78,7 +81,7 @@ def _is_available_driver(config, driver_name):
  
  def _verify_cfg(driver_list, config):
      if not config.has_section('one-build'):
-        raise ImportError('\'one-build\' section is required in configuraion file')
+        raise ImportError('[one-build] section is required in configuraion file')
  
      import_driver_cnt = 0
      if _is_available_driver(config, 'one-import-tf'):
@@ -87,6 +90,8 @@ def _verify_cfg(driver_list, config):
          import_driver_cnt += 1
      if _is_available_driver(config, 'one-import-bcq'):
          import_driver_cnt += 1
+    if _is_available_driver(config, 'one-import-onnx'):
+        import_driver_cnt += 1
      if import_driver_cnt > 1:
          raise AssertionError('Only one import-* driver can be executed')
  
@@ -106,8 +111,8 @@ def main():
  
      # verify configuration file
      drivers = [
-        'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-optimize',
-        'one-quantize', 'one-pack', 'one-codegen'
+        'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-import-onnx',
+        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen'
      ]
      _verify_cfg(drivers, config)
  
diff --git a/compiler/one-cmds/one-build.template.cfg b/compiler/one-cmds/one-build.template.cfg

index ab6ac81d7ea462215c7da92c7c3516e8dec9f9a8..52d860813c72d010f24c868f56721bd97a73b643 100644 (file)
--- a/compiler/one-cmds/one-build.template.cfg
+++ b/compiler/one-cmds/one-build.template.cfg
@@ -2,6 +2,7 @@
  one-import-tf=True
  one-import-tflite=False
  one-import-bcq=False
+one-import-onnx=False
  one-optimize=True
  one-quantize=False
  one-pack=True
@@ -18,6 +19,7 @@ converter_version=v1
  [one-optimize]
  input_path=inception_v3.circle
  output_path=inception_v3.opt.circle
+generate_profile_data=False
  
  [one-pack]
  input_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen

index fbe3d52d2876c77ea8e6183c6c106862b1b4d553..ebd8ad7e5a181e16701fa89fd128125a278c4fe1 100644 (file)
--- a/compiler/one-cmds/one-codegen
+++ b/compiler/one-cmds/one-codegen
@@ -20,6 +20,8 @@
  # limitations under the License.
  
  import argparse
+import copy
+import itertools
  import os
  import subprocess
  import sys
@@ -40,7 +42,9 @@ def _get_backends_list():
  
  
  def _get_parser():
-    parser = argparse.ArgumentParser(description='command line tool for code generation')
+    codegen_usage = 'one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] [--] [COMMANDS FOR BACKEND]'
+    parser = argparse.ArgumentParser(
+        description='command line tool for code generation', usage=codegen_usage)
  
      _utils._add_default_arg(parser)
  
@@ -68,18 +72,35 @@ def _verify_arg(parser, args):
  
  
  def _parse_arg(parser):
-    args, unknown_args = parser.parse_known_args()
+    codegen_args = []
+    backend_args = []
+    unknown_args = []
+    argv = copy.deepcopy(sys.argv)
+    # delete file name
+    del argv[0]
+    # split by '--'
+    args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+    # one-codegen has two interfaces
+    # 1. one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] [COMMANDS FOR BACKEND]
+    if len(args) == 1:
+        codegen_args = args[0]
+        codegen_args, unknown_args = parser.parse_known_args(codegen_args)
+    # 2. one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] -- [COMMANDS FOR BACKEND]
+    if len(args) == 2:
+        codegen_args = args[0]
+        backend_args = args[1]
+        codegen_args = parser.parse_args(codegen_args)
      # print version
-    if args.version:
+    if len(args) and codegen_args.version:
          _utils._print_version_and_exit(__file__)
  
-    return args, unknown_args
+    return codegen_args, backend_args, unknown_args
  
  
  def main():
      # parse arguments
      parser = _get_parser()
-    args, unknown_args = _parse_arg(parser)
+    args, backend_args, unknown_args = _parse_arg(parser)
  
      # parse configuration file
      _utils._parse_cfg(args, 'one-codegen')
@@ -90,7 +111,7 @@ def main():
      # make a command to run given backend driver
      dir_path = os.path.dirname(os.path.realpath(__file__))
      codegen_path = os.path.join(dir_path, getattr(args, 'backend') + '-compile')
-    codegen_cmd = [codegen_path] + unknown_args
+    codegen_cmd = [codegen_path] + backend_args + unknown_args
      if _utils._is_valid_attr(args, 'command'):
          codegen_cmd += getattr(args, 'command').split()
  
@@ -100,6 +121,8 @@ def main():
              bufsize=1) as p:
          for line in p.stdout:
              sys.stdout.buffer.write(line)
+    if p.returncode != 0:
+        sys.exit(p.returncode)
  
  
  if __name__ == '__main__':
diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx

new file mode 100644 (file)

index 0000000..1bcf2e8
--- /dev/null
+++ b/compiler/one-cmds/one-import-onnx
@@ -0,0 +1,161 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"                  # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import sys
+import tempfile
+import onnx
+import onnx_tf
+
+import utils as _utils
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert ONNX to circle')
+
+    _utils._add_default_arg(parser)
+
+    ## tf2tfliteV2 arguments
+    tf2tfliteV2_group = parser.add_argument_group('converter arguments')
+
+    # input and output path.
+    tf2tfliteV2_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    tf2tfliteV2_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    # input and output arrays.
+    tf2tfliteV2_group.add_argument(
+        '-I',
+        '--input_arrays',
+        type=str,
+        help='names of the input arrays, comma-separated')
+    tf2tfliteV2_group.add_argument(
+        '-O',
+        '--output_arrays',
+        type=str,
+        help='names of the output arrays, comma-separated')
+
+    # fixed options
+    tf2tfliteV2_group.add_argument('--model_format', default='saved_model')
+    tf2tfliteV2_group.add_argument('--converter_version', default='v2')
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not _utils._is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not _utils._is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        _utils._print_version_and_exit(__file__)
+
+    return args
+
+
+def _convert(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        # convert onnx to tf saved model
+        onnx_model = onnx.load(getattr(args, 'input_path'))
+        tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
+
+        savedmodel_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.savedmodel'
+        savedmodel_output_path = os.path.join(tmpdir, savedmodel_name)
+        tf_savedmodel.export_graph(savedmodel_output_path)
+
+        # make a command to convert from tf to tflite
+        tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
+        tf2tfliteV2_output_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.tflite'
+        tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name)
+
+        tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(
+            args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path)
+
+        f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
+
+        # convert tf to tflite
+        with subprocess.Popen(
+                tf2tfliteV2_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                bufsize=1) as p:
+            for line in p.stdout:
+                sys.stdout.buffer.write(line)
+                f.write(line)
+        if p.returncode != 0:
+            sys.exit(p.returncode)
+
+        # make a command to convert from tflite to circle
+        tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+        tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+                                                           tf2tfliteV2_output_path,
+                                                           getattr(args, 'output_path'))
+
+        f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+        # convert tflite to circle
+        with subprocess.Popen(
+                tflite2circle_cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                bufsize=1) as p:
+            for line in p.stdout:
+                sys.stdout.buffer.write(line)
+                f.write(line)
+        if p.returncode != 0:
+            sys.exit(p.returncode)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    _utils._parse_cfg(args, 'one-import-onnx')
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # convert
+    _convert(args)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite

index fba697f24875baa1ea78c863c4874dbbcb1087cc..9b80f304b3f8a8b730827ab781375214e335d3ed 100644 (file)
--- a/compiler/one-cmds/one-import-tflite
+++ b/compiler/one-cmds/one-import-tflite
@@ -90,6 +90,8 @@ def _convert(args):
              for line in p.stdout:
                  sys.stdout.buffer.write(line)
                  f.write(line)
+        if p.returncode != 0:
+            sys.exit(p.returncode)
  
  
  def main():
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize

index f03bb8dccd76adc12b7080939c73ec9ca5a4fd10..8ce79d43283a055c3decfa0bd0d84d8d8058bd35 100644 (file)
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -34,6 +34,15 @@ def _get_parser():
  
      _utils._add_default_arg(parser)
  
+    ## utility arguments
+    utility_group = parser.add_argument_group('arguments for utility')
+
+    utility_group.add_argument(
+        '-p',
+        '--generate_profile_data',
+        action='store_true',
+        help='generate profiling data')
+
      ## circle2circle arguments
      circle2circle_group = parser.add_argument_group('arguments for optimization')
  
@@ -44,50 +53,9 @@ def _get_parser():
          '-o', '--output_path', type=str, help='full filepath of the output file')
  
      # optimization pass
-    circle2circle_group.add_argument(
-        '--all', action='store_true', help='enable all optimization pass')
-    circle2circle_group.add_argument(
-        '--fold_dequantize', action='store_true', help='fold Dequantize op')
-    circle2circle_group.add_argument(
-        '--fuse_add_with_tconv', action='store_true', help='fuse Add op to Transposed')
-    circle2circle_group.add_argument(
-        '--fuse_batchnorm_with_tconv',
-        action='store_true',
-        help='fuse BatchNorm op to Transposed Convolution op')
-    circle2circle_group.add_argument(
-        '--fuse_bcq', action='store_true', help='apply Binary Coded Quantization')
-    circle2circle_group.add_argument(
-        '--fuse_preactivation_batchnorm',
-        action='store_true',
-        help='fuse BatchNorm operators of pre-activations to Convolution op')
-    circle2circle_group.add_argument(
-        '--make_batchnorm_gamma_positive',
-        action='store_true',
-        help="""make negative gamma of BatchNorm to a small positive value (1e-10).
-        Note that this pass can change the execution result of the model.
-        So, use it only when the impact is known to be acceptable.""")
-    circle2circle_group.add_argument(
-        '--fuse_activation_function',
-        action='store_true',
-        help='fuse Activation function to a preceding operator')
-    circle2circle_group.add_argument(
-        '--fuse_instnorm', action='store_true', help='fuse ops to InstanceNorm operator')
-    circle2circle_group.add_argument(
-        '--replace_cw_mul_add_with_depthwise_conv',
-        action='store_true',
-        help='replace channel-wise Mul/Add with DepthwiseConv2D')
-    circle2circle_group.add_argument(
-        '--resolve_customop_add',
-        action='store_true',
-        help='convert Custom(Add) op to Add op')
-    circle2circle_group.add_argument(
-        '--resolve_customop_batchmatmul',
-        action='store_true',
-        help='convert Custom(BatchMatmul) op to BatchMatmul op')
-    circle2circle_group.add_argument(
-        '--resolve_customop_matmul',
-        action='store_true',
-        help='convert Custom(Matmul) op to Matmul op')
+    for opt in _utils._CONSTANT.OPTIMIZATION_OPTS:
+        # opt = (option_name, help_message)
+        circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1])
  
      return parser
  
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv

index e5c88411f23a238c616005bb84a763a158b89b22..bb3616574628d0bce392d53fa068f82e09639c28 100644 (file)
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -51,6 +51,21 @@ python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host file
  python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
    install Pillow==6.2.2
  
+# Install PyTorch and ONNX related
+python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
+  --trusted-host download.pytorch.org \
+  install torch==1.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+
+# NOTE Latest onnx 1.8.1 has compatibility issue with onnx-tf 1.7.0
+#      MUST install with onnx==1.8.0
+# Provide install of custom onnx-tf
+if [ -n "${EXT_ONNX_TF_WHL}" ]; then
+  python -m pip --default-timeout=1000 install onnx==1.8.0 ${EXT_ONNX_TF_WHL}
+else
+  python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
+    install onnx==1.8.0 onnx-tf==1.7.0
+fi
+
  # Create python symoblic link
  rm -f ${DRIVER_PATH}/python
  ln -s venv/bin/python ${DRIVER_PATH}/python
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize

index 9bdfea8b81c7d0df3342fadbf3196d18f7e6dac2..475f44a49c7512c1e3d738ce2b84f6275eb97c53 100644 (file)
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -38,10 +38,22 @@ def _get_parser():
      parser.add_argument(
          '-i', '--input_path', type=str, help='full filepath of the input file')
      parser.add_argument(
-        '-d', '--input_data', type=str, help='full filepath of the input data file')
+        '-d',
+        '--input_data',
+        type=str,
+        help=
+        'full filepath of the input data file. if not specified, run with random input data.'
+    )
      parser.add_argument(
          '-o', '--output_path', type=str, help='full filepath of the output file')
  
+    # argument for profiling
+    parser.add_argument(
+        '-p',
+        '--generate_profile_data',
+        action='store_true',
+        help='generate profiling data')
+
      ## arguments for quantization
      quantization_group = parser.add_argument_group('arguments for quantization')
  
@@ -66,26 +78,30 @@ def _get_parser():
          type=str,
          help='record mode (supported: percentile/moving_average, default=percentile)')
  
-    # set default values
-    quantization_group.set_defaults(
-        input_dtype='float32',
-        quantized_dtype='uint8',
-        granularity='layer',
-        min_percentile='1.0',
-        max_percentile='99.0',
-        mode='percentile')
-
      return parser
  
  
+def _set_default_values(args):
+    if not _utils._is_valid_attr(args, 'input_dtype'):
+        setattr(args, 'input_dtype', 'float32')
+    if not _utils._is_valid_attr(args, 'quantized_dtype'):
+        setattr(args, 'quantized_dtype', 'uint8')
+    if not _utils._is_valid_attr(args, 'granularity'):
+        setattr(args, 'granularity', 'layer')
+    if not _utils._is_valid_attr(args, 'mode'):
+        setattr(args, 'mode', 'percentile')
+    if not _utils._is_valid_attr(args, 'min_percentile'):
+        setattr(args, 'min_percentile', '1.0')
+    if not _utils._is_valid_attr(args, 'max_percentile'):
+        setattr(args, 'max_percentile', '99.0')
+
+
  def _verify_arg(parser, args):
      """verify given arguments"""
      # check if required arguments is given
      missing = []
      if not _utils._is_valid_attr(args, 'input_path'):
          missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'input_data'):
-        missing.append('-d/--input_data')
      if not _utils._is_valid_attr(args, 'output_path'):
          missing.append('-o/--output_path')
      if len(missing):
@@ -128,6 +144,9 @@ def _quantize(args):
              tmpdir,
              os.path.splitext(os.path.basename(args.input_path))[0]) + '1.circle'
          circle_quantizer_cmd.append(tmp_output_path_1)
+        # profiling
+        if _utils._is_valid_attr(args, 'generate_profile_data'):
+            circle_quantizer_cmd.append('--generate_profile_data')
  
          f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
  
@@ -168,6 +187,9 @@ def _quantize(args):
          if _utils._is_valid_attr(args, 'mode'):
              circle_record_minmax_cmd.append('--mode')
              circle_record_minmax_cmd.append(getattr(args, 'mode'))
+        # profiling
+        if _utils._is_valid_attr(args, 'generate_profile_data'):
+            circle_record_minmax_cmd.append('--generate_profile_data')
  
          f.write((' '.join(circle_record_minmax_cmd) + '\n').encode())
  
@@ -197,6 +219,9 @@ def _quantize(args):
          circle_quantizer_cmd.append(tmp_output_path_2)
          if _utils._is_valid_attr(args, 'output_path'):
              circle_quantizer_cmd.append(getattr(args, 'output_path'))
+        # profiling
+        if _utils._is_valid_attr(args, 'generate_profile_data'):
+            circle_quantizer_cmd.append('--generate_profile_data')
  
          f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
  
@@ -221,6 +246,9 @@ def main():
      # parse configuration file
      _utils._parse_cfg(args, 'one-quantize')
  
+    # set default values
+    _set_default_values(args)
+
      # verify arguments
      _verify_arg(parser, args)
  
diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt

index 412787a6450b71eaafd6c1bbbdcd1aafe0f64661..6f9f2847e4bf5e303d6bc3af5f650a6cf86ba3c9 100644 (file)
--- a/compiler/one-cmds/tests/CMakeLists.txt
+++ b/compiler/one-cmds/tests/CMakeLists.txt
@@ -17,6 +17,10 @@ file(APPEND "${DRIVER_SCRIPT}" "  USER_PATH=$1\n")
  file(APPEND "${DRIVER_SCRIPT}" "  export PATH=$USER_PATH:$PATH\n")
  file(APPEND "${DRIVER_SCRIPT}" "fi\n")
  file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
  
  foreach(TESTITEM IN ITEMS ${TESTITEMS})
    get_filename_component(ITEM_PREFIX ${TESTITEM} NAME_WE)
@@ -35,7 +39,16 @@ foreach(CONFIGITEM IN ITEMS ${CONFIGITEMS})
    install(FILES ${CONFIGITEM} DESTINATION test)
  endforeach(CONFIGITEM)
  
-file(APPEND "${DRIVER_SCRIPT}" "popd> /dev/null")
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+  echo \"$fail_count TESTS FAILED\"
+  exit 255
+else
+  echo \"ALL TESTS PASSED!\"
+fi\n
+")
  
  set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
  set(PREPROCESS_IMAGES_PY "${CMAKE_CURRENT_SOURCE_DIR}/preprocess_images.py")
diff --git a/compiler/one-cmds/tests/one-build_001.test b/compiler/one-cmds/tests/one-build_001.test

index fb4877344ed0b734a96d35f6ba905a6808ea2892..ebbb3235b8b51ae9263d98aced77cb1f13fc635f 100644 (file)
--- a/compiler/one-cmds/tests/one-build_001.test
+++ b/compiler/one-cmds/tests/one-build_001.test
@@ -14,6 +14,8 @@
  # See the License for the specific language governing permissions and
  # limitations under the License.
  
+# one-import-tf -> one-optimize
+
  filename_ext="$(basename -- $0)"
  filename="${filename_ext%.*}"
  
diff --git a/compiler/one-cmds/tests/one-build_002.test b/compiler/one-cmds/tests/one-build_002.test

index fdfd607e2a98cfac21416fdef1e26672cbc945f7..43fce4e6fc135ac10c406f58107be3e63acd889a 100644 (file)
--- a/compiler/one-cmds/tests/one-build_002.test
+++ b/compiler/one-cmds/tests/one-build_002.test
@@ -14,6 +14,8 @@
  # See the License for the specific language governing permissions and
  # limitations under the License.
  
+# one-import-tf -> one-optimize -> one-pack
+
  filename_ext="$(basename -- $0)"
  filename="${filename_ext%.*}"
  
diff --git a/compiler/one-cmds/tests/one-build_003.cfg b/compiler/one-cmds/tests/one-build_003.cfg

new file mode 100644 (file)

index 0000000..6aec3ca
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_003.cfg
@@ -0,0 +1,21 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
diff --git a/compiler/one-cmds/tests/one-build_003.test b/compiler/one-cmds/tests/one-build_003.test

new file mode 100644 (file)

index 0000000..d835be4
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_003.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_003.cfg"
+outputfile="inception_v3.quantized.circle"
+
+rm -rf ${outputfile}
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_004.cfg b/compiler/one-cmds/tests/one-build_004.cfg

new file mode 100644 (file)

index 0000000..c23405b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_004.cfg
@@ -0,0 +1,20 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.circle
diff --git a/compiler/one-cmds/tests/one-build_004.test b/compiler/one-cmds/tests/one-build_004.test

new file mode 100644 (file)

index 0000000..f4174bd
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_004.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_004.cfg"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_005.cfg b/compiler/one-cmds/tests/one-build_005.cfg

new file mode 100644 (file)

index 0000000..841b372
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_005.cfg
@@ -0,0 +1,20 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=True
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-tflite]
+input_path=inception_v3.tflite
+output_path=inception_v3.circle
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_005.test b/compiler/one-cmds/tests/one-build_005.test

new file mode 100644 (file)

index 0000000..772483d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_005.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tflite -> one-optimize -> one-codgen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_005.cfg"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_006.cfg b/compiler/one-cmds/tests/one-build_006.cfg

new file mode 100644 (file)

index 0000000..e754bde
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_006.cfg
@@ -0,0 +1,29 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=True
+one-pack=False
+one-codegen=True
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-quantize]
+input_path=inception_v3.opt.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.quantized.circle
diff --git a/compiler/one-cmds/tests/one-build_006.test b/compiler/one-cmds/tests/one-build_006.test

new file mode 100644 (file)

index 0000000..caf8897
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_006.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_006.cfg"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_007.cfg b/compiler/one-cmds/tests/one-build_007.cfg

new file mode 100644 (file)

index 0000000..5261075
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_007.cfg
@@ -0,0 +1,29 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-quantize]
+input_path=inception_v3.opt.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
+
+[one-pack]
+input_path=inception_v3.quantized.circle
+output_path=inception_v3_pkg
diff --git a/compiler/one-cmds/tests/one-build_007.test b/compiler/one-cmds/tests/one-build_007.test

new file mode 100644 (file)

index 0000000..0861870
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_007.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_007.cfg"
+outputfile="inception_v3_pkg"
+
+rm -rf ${outputfile}
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_008.cfg b/compiler/one-cmds/tests/one-build_008.cfg

new file mode 100644 (file)

index 0000000..615047c
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_008.cfg
@@ -0,0 +1,23 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.opt.circle
+all=True
+remove_redundant_transpose=True
+
+[one-codegen]
+backend=dummy
+command=-o test_onnx_model.bin test_onnx_model.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_008.test b/compiler/one-cmds/tests/one-build_008.test

new file mode 100644 (file)

index 0000000..bfb7666
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_008.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_008.cfg"
+outputfile="test_onnx_model.bin"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_009.cfg b/compiler/one-cmds/tests/one-build_009.cfg

new file mode 100644 (file)

index 0000000..66bca25
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_009.cfg
@@ -0,0 +1,24 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-onnx]
+input_path=onnx_conv2d_conv2d.onnx
+output_path=onnx_conv2d_conv2d.circle
+
+[one-optimize]
+input_path=onnx_conv2d_conv2d.circle
+output_path=onnx_conv2d_conv2d.opt.circle
+all=True
+remove_redundant_transpose=True
+convert_nchw_to_nhwc=True
+
+[one-codegen]
+backend=dummy
+command=-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_009.test b/compiler/one-cmds/tests/one-build_009.test

new file mode 100644 (file)

index 0000000..0d76626
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_009.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-onnx -> one-optimize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_009.cfg"
+outputfile="onnx_conv2d_conv2d.bin"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_neg_003.test b/compiler/one-cmds/tests/one-build_neg_003.test

index a8ad240493b65835fe790e6296a10a77b3fc944c..bcbd2f98a82ca964688eb91fe9f1976dc3b141d5 100644 (file)
--- a/compiler/one-cmds/tests/one-build_neg_003.test
+++ b/compiler/one-cmds/tests/one-build_neg_003.test
@@ -21,7 +21,7 @@ filename="${filename_ext%.*}"
  
  trap_err_onexit()
  {
-  if grep -q "'one-build' section is required in configuraion file" "${filename}.log"; then
+  if grep -q "\[one-build\] section is required in configuraion file" "${filename}.log"; then
      echo "${filename_ext} SUCCESS"
      exit 0
    fi
diff --git a/compiler/one-cmds/tests/one-codegen_001.test b/compiler/one-cmds/tests/one-codegen_001.test

new file mode 100644 (file)

index 0000000..7c679b3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_001.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# copy help-compile to bin folder
+cp help-compile ../bin/help-compile
+
+# run test
+one-codegen -b help -- -h > ${filename}.log
+
+rm -rf ../bin/help-compile
+
+if grep -q "HELP MESSAGE!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-codegen_002.test b/compiler/one-cmds/tests/one-codegen_002.test

new file mode 100644 (file)

index 0000000..feb8489
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_002.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run one-codegen with dummy-compile driver
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-codegen -b dummy -o ${outputfile} "dummy.circle"
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-codegen_003.test b/compiler/one-cmds/tests/one-codegen_003.test

new file mode 100644 (file)

index 0000000..47d12a4
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_003.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run one-codegen with dummy-compile driver
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-codegen -b dummy -- -o ${outputfile} "dummy.circle"
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-codegen_004.test b/compiler/one-cmds/tests/one-codegen_004.test

new file mode 100644 (file)

index 0000000..88f4233
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_004.test
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# print one-codegen's help message
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-codegen -h > ${filename}.log
+
+if grep -q "command line tool for code generation" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-codegen_neg_001.test b/compiler/one-cmds/tests/one-codegen_neg_001.test

new file mode 100644 (file)

index 0000000..fd5d0cb
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_neg_001.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with no input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-codegen > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-onnx_001.test b/compiler/one-cmds/tests/one-import-onnx_001.test

new file mode 100644 (file)

index 0000000..6119b68
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-onnx_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./test_onnx_model.onnx"
+outputfile="./test_onnx_model.circle"
+
+rm -rf ${outputfile}
+rm -rf ${outputfile}.log
+
+# run test
+one-import-onnx \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${outputfile}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import_005.cfg b/compiler/one-cmds/tests/one-import_005.cfg

new file mode 100644 (file)

index 0000000..abe4c7d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_005.cfg
@@ -0,0 +1,13 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
diff --git a/compiler/one-cmds/tests/one-import_005.test b/compiler/one-cmds/tests/one-import_005.test

new file mode 100644 (file)

index 0000000..ca49db1
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_005.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-import_005.cfg"
+outputfile="test_onnx_model.circle"
+
+rm -f ${outputfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-optimize_001.test b/compiler/one-cmds/tests/one-optimize_001.test

index 240a6250642b156638893d3dec23e4d2fef0e38a..0d58a6a9e135c737c0984ff6f733abfcd44333f1 100644 (file)
--- a/compiler/one-cmds/tests/one-optimize_001.test
+++ b/compiler/one-cmds/tests/one-optimize_001.test
@@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then
  fi
  
  # run test
-one-optimize --all \
+one-optimize --O1 \
  --input_path ${inputfile} \
  --output_path ${outputfile} >> /dev/null
  
diff --git a/compiler/one-cmds/tests/one-optimize_neg_001.test b/compiler/one-cmds/tests/one-optimize_neg_001.test

index 4ee5096975d62974a2ff1593189f0ada559b68e3..a30b4164dff43705dd04116105143346f94fdc5f 100644 (file)
--- a/compiler/one-cmds/tests/one-optimize_neg_001.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_001.test
@@ -39,7 +39,7 @@ rm -rf ${outputfile}
  rm -rf ${outputfile}.log
  
  # run test
-one-optimize --all \
+one-optimize --O1 \
  --input_path ${inputfile} \
  --output_path ${outputfile} > ${filename}.log
  
diff --git a/compiler/one-cmds/tests/one-optimize_neg_002.test b/compiler/one-cmds/tests/one-optimize_neg_002.test

index 2c2a29a87966512ada989abd1e7fc1d0e0992e86..7ccf4a89cb4e757329860ba346cfab6bdcdbe6c2 100644 (file)
--- a/compiler/one-cmds/tests/one-optimize_neg_002.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_002.test
@@ -39,7 +39,7 @@ rm -rf ${outputfile}
  rm -rf ${outputfile}.log
  
  # run test
-one-optimize --all \
+one-optimize --O1 \
  --input_path ${inputfile} \
  --output_path ${outputfile} > ${filename}.log
  
diff --git a/compiler/one-cmds/tests/one-optimize_neg_003.test b/compiler/one-cmds/tests/one-optimize_neg_003.test

index 95f08fd95cd3248cdbcdaf035358cd19b5caa7a5..3fe7d330e9cac8a7914df82dc9a0eb325aba6c8e 100644 (file)
--- a/compiler/one-cmds/tests/one-optimize_neg_003.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_003.test
@@ -44,7 +44,7 @@ if [[ ! -s ${inputfile} ]]; then
  fi
  
  # run test
-one-optimize --all \
+one-optimize --O1 \
  --input_path "${inputfile}" > "${filename}.log" 2>&1
  
  echo "${filename_ext} FAILED"
diff --git a/compiler/one-cmds/tests/one-quantize_002.test b/compiler/one-cmds/tests/one-quantize_002.test

new file mode 100644 (file)

index 0000000..3704425
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_002.test
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.circle"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+  /bin/bash one-import_001.test >> /dev/null
+  return_code=$?
+  if [[ ${return_code} != 0 ]]; then
+    trap_err_onexit
+  fi
+fi
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ./inception_v3.circle \
+--output_path ./inception_v3.random.quantized.circle >> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh

index bc3d65d92b28d5319defedbb69a6df1fb9d7d6ad..694651d746b23b6722b87aeadf0ec93f2e54eebf 100644 (file)
--- a/compiler/one-cmds/tests/prepare_test_materials.sh
+++ b/compiler/one-cmds/tests/prepare_test_materials.sh
@@ -77,6 +77,20 @@ if [[ ! -s "test_keras_model.h5" ]]; then
      # https://github.com/Samsung/ONE/issues/4268#issuecomment-725025805
  fi
  
+if [[ ! -s "test_onnx_model.onnx" ]]; then
+    rm -rf test_onnx_model.zip
+    wget https://github.com/Samsung/ONE/files/5768243/test_onnx_model.zip
+    unzip test_onnx_model.zip
+    # https://github.com/Samsung/ONE/issues/5548#issuecomment-754373360
+fi
+
+if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
+    rm -rf onnx_conv2d_conv2d.zip
+    wget https://github.com/Samsung/ONE/files/5774648/onnx_conv2d_conv2d.zip
+    unzip onnx_conv2d_conv2d.zip
+    # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
+fi
+
  # prepare 'inception_v3.circle' file used for quantization test
  inputfile="./inception_v3.pb"
  outputfile="./inception_v3.circle"
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py

index 6eff9d772210bd5b259b1c1917d20c40f20cfa4c..1d5c4d4fd0045d95e9adfd57fde69c5a4317f8a8 100644 (file)
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -21,6 +21,61 @@ import subprocess
  import sys
  
  
+class _CONSTANT:
+    __slots__ = ()  # This prevents access via __dict__.
+    OPTIMIZATION_OPTS = (
+        # (OPTION_NAME, HELP_MESSAGE)
+        ('O1', 'enable O1 optimization pass'),
+        ('convert_nchw_to_nhwc',
+         'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
+         ),
+        ('nchw_to_nhwc_preserve_input_shape',
+         'preserve the input shape of the model (argument for convert_nchw_to_nhwc)'),
+        ('nchw_to_nhwc_preserve_output_shape',
+         'preserve the output shape of the model (argument for convert_nchw_to_nhwc)'),
+        ('fold_add_v2', 'fold AddV2 op with constant inputs'),
+        ('fold_cast', 'fold Cast op with constant input'),
+        ('fold_dequantize', 'fold Dequantize op'),
+        ('fold_sparse_to_dense', 'fold SparseToDense op'),
+        ('forward_reshape_to_unaryop', 'Forward Reshape op'),
+        ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
+        ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
+        ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
+        ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
+        ('fuse_bcq', 'apply Binary Coded Quantization'),
+        ('fuse_preactivation_batchnorm',
+         'fuse BatchNorm operators of pre-activations to Convolution op'),
+        ('make_batchnorm_gamma_positive',
+         'make negative gamma of BatchNorm to a small positive value (1e-10).'
+         ' Note that this pass can change the execution result of the model.'
+         ' So, use it only when the impact is known to be acceptable.'),
+        ('fuse_activation_function', 'fuse Activation function to a preceding operator'),
+        ('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
+        ('replace_cw_mul_add_with_depthwise_conv',
+         'replace channel-wise Mul/Add with DepthwiseConv2D'),
+        ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'),
+        ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'),
+        ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'),
+        ('remove_unnecessary_slice', 'remove unnecessary slice ops'),
+        ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
+        ('remove_unnecessary_split', 'remove unnecessary split ops'),
+        ('resolve_customop_add', 'convert Custom(Add) op to Add op'),
+        ('resolve_customop_batchmatmul',
+         'convert Custom(BatchMatmul) op to BatchMatmul op'),
+        ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
+        ('shuffle_weight_to_16x1float32',
+         'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
+         ' Note that it only converts weights whose row is a multiple of 16'),
+        ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
+        ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
+        ('substitute_transpose_to_reshape',
+         'convert certain condition Transpose to Reshape'),
+        ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'))
+
+
+_CONSTANT = _CONSTANT()
+
+
  def _add_default_arg(parser):
      # version
      parser.add_argument(
@@ -114,25 +169,13 @@ def _make_tflite2circle_cmd(driver_path, input_path, output_path):
  def _make_circle2circle_cmd(args, driver_path, input_path, output_path):
      """make a command for running circle2circle"""
      cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
+    # profiling
+    if _is_valid_attr(args, 'generate_profile_data'):
+        cmd.append('--generate_profile_data')
      # optimization pass
-    if _is_valid_attr(args, 'all'):
-        cmd.append('--all')
-    if _is_valid_attr(args, 'fold_dequantize'):
-        cmd.append('--fold_dequantize')
-    if _is_valid_attr(args, 'fuse_add_with_tconv'):
-        cmd.append('--fuse_add_with_tconv')
-    if _is_valid_attr(args, 'fuse_batchnorm_with_tconv'):
-        cmd.append('--fuse_batchnorm_with_tconv')
-    if _is_valid_attr(args, 'fuse_bcq'):
-        cmd.append('--fuse_bcq')
-    if _is_valid_attr(args, 'fuse_instnorm'):
-        cmd.append('--fuse_instnorm')
-    if _is_valid_attr(args, 'resolve_customop_add'):
-        cmd.append('--resolve_customop_add')
-    if _is_valid_attr(args, 'resolve_customop_batchmatmul'):
-        cmd.append('--resolve_customop_batchmatmul')
-    if _is_valid_attr(args, 'resolve_customop_matmul'):
-        cmd.append('--resolve_customop_matmul')
+    for opt in _CONSTANT.OPTIMIZATION_OPTS:
+        if _is_valid_attr(args, opt[0]):
+            cmd.append('--' + opt[0])
  
      return cmd
  
diff --git a/compiler/one-cmds/validate-onnx2circle/CMakeLists.txt b/compiler/one-cmds/validate-onnx2circle/CMakeLists.txt

new file mode 100644 (file)

index 0000000..6727359
--- /dev/null
+++ b/compiler/one-cmds/validate-onnx2circle/CMakeLists.txt
@@ -0,0 +1,5 @@
+install(FILES validate_onnx2circle.py
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
diff --git a/compiler/one-cmds/validate-onnx2circle/README.md b/compiler/one-cmds/validate-onnx2circle/README.md

new file mode 100644 (file)

index 0000000..341df3d
--- /dev/null
+++ b/compiler/one-cmds/validate-onnx2circle/README.md
@@ -0,0 +1,36 @@
+# validate-onnx2circle
+
+_validate-onnx2circle_ provides validation of onnx to optimized circle conversion
+by comparing execution results of original onnx model and optimized circle model.
+
+This is currently in experimental state.
+
+## How to run the script
+
+Install `onnx-runtime` inside virtual environment
+```
+source install_path/bin/venv/bin/activate
+
+python -m pip --default-timeout=1000 --trusted-host pypi.org \
+  --trusted-host files.pythonhost.org install onnxruntime==1.6.0
+
+deactivate
+```
+
+Run the sctipt
+```bash
+cd install_path/test
+
+driver='one/build/debug/compiler/luci-eval-driver/luci_eval_driver'
+onnx_filepath='path_to_onnx_model.onnx'
+circle_filepath='path_to_optimized_circle.circle'
+
+./validate_onnx2circle.py --driver ${driver} --onnx ${onnx_filepath} --circle ${circle_filepath}
+```
+
+Output will show something like this
+```
+Run ONNX...
+Run luci-interpreter...
+Compare 0 True
+```
diff --git a/compiler/one-cmds/validate-onnx2circle/validate_onnx2circle.py b/compiler/one-cmds/validate-onnx2circle/validate_onnx2circle.py

new file mode 100644 (file)

index 0000000..eac2f6d
--- /dev/null
+++ b/compiler/one-cmds/validate-onnx2circle/validate_onnx2circle.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"                  # '''
+''''export PY_PATH=${SCRIPT_PATH}/../bin/venv/bin/python                                # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE This is an experimental script to evaluate onnx-circle conversion
+#      by running onnxruntime and luci-interpreter.
+#      Plan is to run this regularly in CI
+
+import subprocess
+import argparse
+import numpy as np
+import torch
+import onnx
+import onnxruntime as ort
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--onnx', type=str, required=True)
+parser.add_argument('--circle', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+onnx_filepath = args.onnx
+circle_filepath = args.circle
+
+
+def to_numpy(tensor):
+    return tensor.cpu().numpy()
+
+
+def to_nhwc(tensor):
+    if (tensor.ndim == 4):
+        return np.transpose(tensor, (0, 2, 3, 1))
+    return tensor
+
+
+class OnnxRunner:
+    def __init__(self, filepath):
+        self.filepath = filepath
+        self.session = None
+        self.inputs = None
+        self.inputs_size = None
+        self.inputs_data = None
+        self.outputs = None
+        self.outputs_size = None
+
+    def load(self):
+        model = onnx.load(self.filepath)
+        onnx.checker.check_model(model)
+        self.session = ort.InferenceSession(self.filepath)
+
+    def feed_random_inputs(self):
+        self.inputs = self.session.get_inputs()
+        self.inputs_size = len(self.inputs)
+        # reset input dictionary
+        self.inputs_data = {}
+        for in_idx in range(self.inputs_size):
+            input_shape = self.inputs[in_idx].shape
+            input_type = self.inputs[in_idx].type
+            if input_type == 'tensor(float)':
+                torch_type = torch.float32
+            else:
+                # TODO support other dtype
+                raise SystemExit("Unsupported input dtype")
+
+            x = torch.randn(input_shape, dtype=torch_type)
+            input_npa = to_numpy(x)
+            self.inputs_data.update({self.inputs[in_idx].name: input_npa})
+
+            # save NHWC form of input for luci-interpreter
+            input_npa_nhwc = to_nhwc(input_npa)
+            input_npa_nhwc.tofile(circle_filepath + ".input" + str(in_idx))
+
+    def run(self):
+        self.outs = self.session.run(None, self.inputs_data)
+
+    def get_outputs(self):
+        self.outputs = self.session.get_outputs()
+        self.outputs_size = len(self.outputs)
+
+
+# Run ONNX model
+print("Run ONNX...")
+onnx_runner = OnnxRunner(onnx_filepath)
+onnx_runner.load()
+onnx_runner.feed_random_inputs()
+onnx_runner.run()
+onnx_runner.get_outputs()
+
+# Execute luci interpreter
+print("Run luci-interpreter...")
+process = subprocess.run(
+    [
+        driver, circle_filepath,
+        str(onnx_runner.inputs_size), circle_filepath + ".input",
+        circle_filepath + ".output"
+    ],
+    check=True)
+
+# Compare results
+rtolerance = 1e-03
+atolerance = 1e-04
+result_compare = True
+for idx in range(onnx_runner.outputs_size):
+    output_shape = onnx_runner.outputs[idx].shape
+    output_type = onnx_runner.outputs[idx].type
+    if output_type == 'tensor(float)':
+        output_np_type = np.float32
+    else:
+        # TODO support other dtype
+        raise SystemExit("Unsupported output dtype")
+
+    # output of luci-interpreter
+    output_data = np.fromfile(circle_filepath + ".output" + str(idx), output_np_type)
+    shape_file = open(circle_filepath + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+    luci_output_data = np.reshape(output_data, output_shape)
+
+    # output of onnx runtime
+    output_nchw = onnx_runner.outs[idx]
+    output_nhwc = to_nhwc(output_nchw)
+
+    # diff has tensor of boolean for each values within tolerance or not
+    diff = np.isclose(output_nhwc, luci_output_data, rtol=rtolerance, atol=atolerance)
+    # get one boolean if all are True then True
+    result_compare_one = np.all(diff)
+    print("Compare", idx, result_compare_one)
+    if (not result_compare_one):
+        diff_val = np.subtract(output_nhwc, luci_output_data)
+        print("ONNX Result", output_nhwc)
+        print("Diff", diff_val)
+        print("Diff Max", np.ndarray.max(diff_val))
+
+    result_compare = result_compare and result_compare_one
+
+if (not result_compare):
+    exit(-1)
+
+exit(0)
diff --git a/compiler/oneco/CMakeLists.txt b/compiler/oneco/CMakeLists.txt

index 10f466948f7eb859865739d9645ea07810ad9c1a..418bc27ac4bf3d426265624b88e5a42ecbeaeabe 100644 (file)
--- a/compiler/oneco/CMakeLists.txt
+++ b/compiler/oneco/CMakeLists.txt
@@ -20,7 +20,6 @@ target_include_directories(moco_onnx_frontend PRIVATE src)
  target_include_directories(moco_onnx_frontend PUBLIC include)
  target_link_libraries(moco_onnx_frontend PUBLIC moco_onnx_proto)
  target_link_libraries(moco_onnx_frontend PUBLIC loco)
-target_link_libraries(moco_onnx_frontend PRIVATE stdex)
  target_link_libraries(moco_onnx_frontend PRIVATE cwrap)
  
  nnas_find_package(GTest QUIET)
diff --git a/compiler/oneco/requires.cmake b/compiler/oneco/requires.cmake

index 4e99b0eac69ebc4252f1a344ec1a786ec00d65a4..c11a84d9c66a76d8b0e044ddb50f30e1df6f9647 100644 (file)
--- a/compiler/oneco/requires.cmake
+++ b/compiler/oneco/requires.cmake
@@ -1,3 +1,2 @@
-require("stdex")
  require("loco")
  require("cwrap")
diff --git a/compiler/oneco/src/Frontend.cpp b/compiler/oneco/src/Frontend.cpp

index d633c1c2eaeb7329320d347f37901e17fb9245f2..4b1554ee874fd435f3a40df4c628887d585c78ab 100644 (file)
--- a/compiler/oneco/src/Frontend.cpp
+++ b/compiler/oneco/src/Frontend.cpp
@@ -76,8 +76,8 @@ void load_onnx(const std::string &path, moco::onnx::Frontend::FileType type,
  // TODO Make comments clear
  void convert_graph(::onnx::ModelProto &onnx_model_proto, loco::Graph *graph)
  {
-  auto nodes = stdex::make_unique<moco::onnx::SymbolTable>();
-  auto input_names = stdex::make_unique<moco::onnx::SymbolTable>();
+  auto nodes = std::make_unique<moco::onnx::SymbolTable>();
+  auto input_names = std::make_unique<moco::onnx::SymbolTable>();
  
    moco::onnx::GraphBuilderContext gb_context(graph, nodes.get(), input_names.get());
  
diff --git a/compiler/oneco/src/GraphBuilder.h b/compiler/oneco/src/GraphBuilder.h

index 7271eb81a8cdeab19743d7dce7d77601541363b4..7e463ce9a5c3153b847bd00ddde754382c26d754 100644 (file)
--- a/compiler/oneco/src/GraphBuilder.h
+++ b/compiler/oneco/src/GraphBuilder.h
@@ -27,9 +27,9 @@ namespace onnx
  {
  
  /**
-* @brief Parent class of onnx operation graph builders
-* @note GraphBuilder call proper build and validate function according to opset version
-*/
+ * @brief Parent class of onnx operation graph builders
+ * @note GraphBuilder call proper build and validate function according to opset version
+ */
  class GraphBuilder
  {
  public:
diff --git a/compiler/oneco/src/GraphBuilderContext.h b/compiler/oneco/src/GraphBuilderContext.h

index f1f394b508848f03451236ae5e7a1325fecb39d4..dd368e3356d4795b7746d0a5615705dc65a47cc5 100644 (file)
--- a/compiler/oneco/src/GraphBuilderContext.h
+++ b/compiler/oneco/src/GraphBuilderContext.h
@@ -69,13 +69,13 @@ private:
  };
  
  /**
-* @brief Class to store context to build IR from onnx
-*/
+ * @brief Class to store context to build IR from onnx
+ */
  class GraphBuilderContext
  {
  public:
    GraphBuilderContext(loco::Graph *g, SymbolTable *nodes, SymbolTable *input_names)
-      : _g(g), _nodes(nodes), _input_names(input_names)
+    : _g(g), _nodes(nodes), _input_names(input_names)
    {
      // DO NOTHING
    }
diff --git a/compiler/oneco/src/GraphBuilderRegistry.h b/compiler/oneco/src/GraphBuilderRegistry.h

index 1bf4d95147eb98de825fa1c199cef67d596733d1..863a6ee3ab36b12ae58cb01c013be33ab04a4bb5 100644 (file)
--- a/compiler/oneco/src/GraphBuilderRegistry.h
+++ b/compiler/oneco/src/GraphBuilderRegistry.h
@@ -27,15 +27,15 @@ namespace onnx
  {
  
  /**
-* @brief Class to return graph builder for passed onnx Operator
-*/
+ * @brief Class to return graph builder for passed onnx Operator
+ */
  class GraphBuilderRegistry
  {
  public:
    /**
-  * @brief Returns registered GraphBuilder pointer for operator or
-  *        nullptr if not registered
-  */
+   * @brief Returns registered GraphBuilder pointer for operator or
+   *        nullptr if not registered
+   */
    const GraphBuilder *lookup(const std::string &op) const
    {
      if (_builder_map.find(op) == _builder_map.end())
@@ -63,16 +63,16 @@ private:
  } // namespace onnx
  } // namespace moco
  
-#include <stdex/Memory.h>
+#include <memory>
  
-#define REGISTER_OP_BUILDER(NAME, BUILDER)                                                    \
-  namespace                                                                                   \
-  {                                                                                           \
-  __attribute__((constructor)) void reg_op(void)                                              \
-  {                                                                                           \
-    std::unique_ptr<moco::onnx::BUILDER> builder = stdex::make_unique<moco::onnx::BUILDER>(); \
-    moco::onnx::GraphBuilderRegistry::get().add(#NAME, std::move(builder));                   \
-  }                                                                                           \
+#define REGISTER_OP_BUILDER(NAME, BUILDER)                                                  \
+  namespace                                                                                 \
+  {                                                                                         \
+  __attribute__((constructor)) void reg_op(void)                                            \
+  {                                                                                         \
+    std::unique_ptr<moco::onnx::BUILDER> builder = std::make_unique<moco::onnx::BUILDER>(); \
+    moco::onnx::GraphBuilderRegistry::get().add(#NAME, std::move(builder));                 \
+  }                                                                                         \
    }
  
  #endif // __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/oneco/src/Op/Constant.h b/compiler/oneco/src/Op/Constant.h

index e25441d58ddc5c2f2bdec20f924b13598801bdca..be74cfcdd51e23984869df2c50d5043b417c7147 100644 (file)
--- a/compiler/oneco/src/Op/Constant.h
+++ b/compiler/oneco/src/Op/Constant.h
@@ -24,8 +24,8 @@ namespace onnx
  {
  
  /**
-  * @brief GraphBuilder for Constant(since version 1) node
-  */
+ * @brief GraphBuilder for Constant(since version 1) node
+ */
  class Constant_V1
  {
  public:
@@ -34,10 +34,10 @@ public:
  };
  
  /**
-  * @brief GraphBuilder for Constant(since version 9) node
-  * @note Until version 1, only FLOAT16, FLOAT, DOUBLE was supported
-  *       Since version 9, all types are supported
-  */
+ * @brief GraphBuilder for Constant(since version 9) node
+ * @note Until version 1, only FLOAT16, FLOAT, DOUBLE was supported
+ *       Since version 9, all types are supported
+ */
  class Constant_V9
  {
  public:
@@ -46,8 +46,8 @@ public:
  };
  
  /**
-  * @brief GraphBuilder for Constant node
-  */
+ * @brief GraphBuilder for Constant node
+ */
  class ConstantGraphBuilder : public GraphBuilder
  {
  public:
diff --git a/compiler/oneco/src/Op/Identity.h b/compiler/oneco/src/Op/Identity.h

index 41367bea05f0652cfd4222d2b5637a1dc9ebdaf9..dde614592cd1397d1ae29dbf1c2645cea3a2b5b8 100644 (file)
--- a/compiler/oneco/src/Op/Identity.h
+++ b/compiler/oneco/src/Op/Identity.h
@@ -24,8 +24,8 @@ namespace onnx
  {
  
  /**
-  * @brief GraphBuilder for Identity(since version 1) node
-  */
+ * @brief GraphBuilder for Identity(since version 1) node
+ */
  class Identity_V1
  {
  public:
@@ -34,8 +34,8 @@ public:
  };
  
  /**
-  * @brief GraphBuilder for Identity node
-  */
+ * @brief GraphBuilder for Identity node
+ */
  class IdentityGraphBuilder : public GraphBuilder
  {
  public:
diff --git a/compiler/onnx2circle/CMakeLists.txt b/compiler/onnx2circle/CMakeLists.txt

index a0d393bd9ee714b72f7bbd465978722a8111f13c..1a5a7e0938074c6197082a2b1a2b7e4b8715aa57 100644 (file)
--- a/compiler/onnx2circle/CMakeLists.txt
+++ b/compiler/onnx2circle/CMakeLists.txt
@@ -20,7 +20,6 @@ target_link_libraries(onnx2circle PRIVATE moco_log)
  target_link_libraries(onnx2circle PRIVATE exo)
  target_link_libraries(onnx2circle PRIVATE locop)
  target_link_libraries(onnx2circle PRIVATE hermes_std)
-target_link_libraries(onnx2circle PRIVATE stdex)
  target_link_libraries(onnx2circle PRIVATE angkor cwrap)
  target_link_libraries(onnx2circle PRIVATE mir2loco)
  target_link_libraries(onnx2circle PRIVATE mir_onnx_importer)
diff --git a/compiler/onnx2circle/requires.cmake b/compiler/onnx2circle/requires.cmake

index f52e404162f8a56dca7837b398569a3e4be25dd7..b2268ec8b42d0d3f295412489b1656d744152594 100644 (file)
--- a/compiler/onnx2circle/requires.cmake
+++ b/compiler/onnx2circle/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
  require("hermes-std")
  require("mir2loco")
  require("mir")
diff --git a/compiler/onnx2circle/src/onnx2circle.cpp b/compiler/onnx2circle/src/onnx2circle.cpp

index c329ed3d50ac58bdcea40163be4664ed92529c6a..1c03fa1fe63c443404ae37fed26e5ee9dcbe6abd 100644 (file)
--- a/compiler/onnx2circle/src/onnx2circle.cpp
+++ b/compiler/onnx2circle/src/onnx2circle.cpp
@@ -25,10 +25,8 @@
  #include "hermes/ConsoleReporter.h"
  #include "hermes/EnvConfig.h"
  
-#include "stdex/Memory.h"
-
  #include <cassert>
-
+#include <memory>
  #include <iostream>
  #include <stdexcept>
  #include <string>
@@ -56,8 +54,8 @@ struct LoggingContext
      if (ctx == nullptr)
      {
        ctx = new hermes::Context;
-      ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-      ctx->config(stdex::make_unique<EnvConfig>("ONNX2CIRCLE_Log"));
+      ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+      ctx->config(std::make_unique<EnvConfig>("ONNX2CIRCLE_Log"));
      }
  
      return ctx;
@@ -81,7 +79,7 @@ int main(int argc, char **argv)
    using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
  
    // This line allows users to control all the exo-circle loggers via ONNX2CIRCLE_Log_Backend
-  exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("ONNX2CIRCLE_Log_Backend"));
+  exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("ONNX2CIRCLE_Log_Backend"));
  
    LOGGER(l);
  
diff --git a/compiler/onnxkit/CMakeLists.txt b/compiler/onnxkit/CMakeLists.txt

index 81c3622c9d76d86fc6b9eb003feb7af7e17a3c65..9ccc779a843b53c46e4aeecab86c0c6f9d8ccf0b 100644 (file)
--- a/compiler/onnxkit/CMakeLists.txt
+++ b/compiler/onnxkit/CMakeLists.txt
@@ -24,7 +24,6 @@ target_include_directories(onnxkitproto PUBLIC ${ONNX_PROTO_INCLUDE_DIRS})
  target_link_libraries(onnxkitproto PUBLIC libprotobuf)
  
  add_executable(onnxkit ${SOURCES})
-target_link_libraries(onnxkit PRIVATE stdex)
  target_link_libraries(onnxkit PRIVATE cli)
  target_link_libraries(onnxkit PRIVATE onnxkitproto)
  target_link_libraries(onnxkit PRIVATE nncc_common)
diff --git a/compiler/onnxkit/README.md b/compiler/onnxkit/README.md

index d2066cf6522ff533aa7b1d42634bfc0ccd76e85e..0a863950e904094677b8f9fcfed6d818366e9e48 100644 (file)
--- a/compiler/onnxkit/README.md
+++ b/compiler/onnxkit/README.md
@@ -58,4 +58,3 @@ nncc$ cat decoded.pbtxt | path_to_onnxkit/onnxkit encode > encoded.pb
  - onnx
  - Protobuf
  - cli
-- stdex
diff --git a/compiler/onnxkit/src/Main.cpp b/compiler/onnxkit/src/Main.cpp

index 3dfd580ec7bb2ec2a087a7ae02f161d832259925..f97590f7dd6dd930928ff478ab3cd3afd73ce7ac 100644 (file)
--- a/compiler/onnxkit/src/Main.cpp
+++ b/compiler/onnxkit/src/Main.cpp
@@ -18,14 +18,15 @@
  #include "DecodeCommand.hpp"
  
  #include <cli/App.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  int main(int argc, char **argv)
  {
    cli::App app{argv[0]};
  
-  app.insert("encode", stdex::make_unique<EncodeCommand>());
-  app.insert("decode", stdex::make_unique<DecodeCommand>());
+  app.insert("encode", std::make_unique<EncodeCommand>());
+  app.insert("decode", std::make_unique<DecodeCommand>());
  
    return app.run(argc - 1, argv + 1);
  }
diff --git a/compiler/onnxkit/src/Support.cpp b/compiler/onnxkit/src/Support.cpp

index 8c0774175c8c81e2449ad24ddc4e8b3b9607feef..151290a000258723cb6bc9e8265cdefbe8462e61 100644 (file)
--- a/compiler/onnxkit/src/Support.cpp
+++ b/compiler/onnxkit/src/Support.cpp
@@ -16,8 +16,7 @@
  
  #include "Support.hpp"
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <cassert>
  #include <fstream>
  #include <stdexcept>
@@ -33,7 +32,7 @@ std::unique_ptr<T> open_fstream(const std::string &path, std::ios_base::openmode
      return nullptr;
    }
  
-  auto stream = stdex::make_unique<T>(path.c_str(), mode);
+  auto stream = std::make_unique<T>(path.c_str(), mode);
    if (!stream->is_open())
    {
      throw std::runtime_error{"ERROR: Failed to open " + path};
@@ -61,7 +60,7 @@ std::string Cmdline::get_or(unsigned int index, const std::string &s) const
  
  std::unique_ptr<UI> make_ui(const Cmdline &cmdargs)
  {
-  auto iocfg = stdex::make_unique<UI>();
+  auto iocfg = std::make_unique<UI>();
  
    auto in = open_fstream<std::ifstream>(cmdargs.get_or(0, "-"), std::ios::in | std::ios::binary);
    iocfg->in(std::move(in));
diff --git a/compiler/oops/CMakeLists.txt b/compiler/oops/CMakeLists.txt

index f12572d5453261ca413793a773d0f379841f2273..5cc1155985925384505493d4cbb910bd671ebbc8 100644 (file)
--- a/compiler/oops/CMakeLists.txt
+++ b/compiler/oops/CMakeLists.txt
@@ -1,6 +1,7 @@
  add_library(oops INTERFACE)
  target_include_directories(oops INTERFACE include)
  target_link_libraries(oops INTERFACE pepper_str)
+target_link_libraries(oops INTERFACE nncc_coverage)
  
  if(NOT ENABLE_TEST)
    return()
@@ -8,5 +9,5 @@ endif(NOT ENABLE_TEST)
  
  nnas_find_package(GTest REQUIRED)
  
-GTest_AddTest(oops_test test.cpp)
+GTest_AddTest(oops_test src/oops.test.cpp)
  target_link_libraries(oops_test oops)
diff --git a/compiler/oops/include/oops/InternalExn.h b/compiler/oops/include/oops/InternalExn.h

index e14332bb23832d4221c9de776d2dabcbd1990f9a..5da3277b728357b677bed715c76771f4ecb116d8 100644 (file)
--- a/compiler/oops/include/oops/InternalExn.h
+++ b/compiler/oops/include/oops/InternalExn.h
@@ -40,20 +40,20 @@ class InternalExn : public std::exception
  {
  public:
    InternalExn(const char *filename, const int line, const std::string &msg)
-      : _filename(filename), _line(to_uint32(line)), _msg(msg)
+    : _filename(filename), _line(to_uint32(line)), _msg(msg)
    {
      construct_full_msg();
    }
  
    explicit InternalExn(const char *filename, const int line, const std::string &msg, uint32_t val)
-      : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + std::to_string(val))
+    : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + std::to_string(val))
    {
      construct_full_msg();
    }
  
    explicit InternalExn(const char *filename, const int line, const std::string &msg,
                         const std::string &val)
-      : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + val)
+    : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + val)
    {
      construct_full_msg();
    }
@@ -69,7 +69,7 @@ private:
    void construct_full_msg()
    {
      _full_msg =
-        "Internal Exception. " + _msg + " [" + _filename + ":" + std::to_string(_line) + "]";
+      "Internal Exception. " + _msg + " [" + _filename + ":" + std::to_string(_line) + "]";
    }
  
    std::string _full_msg;
diff --git a/compiler/oops/include/oops/UserExn.h b/compiler/oops/include/oops/UserExn.h

index d0138322db96120ea9d250dcc08700ec1850e15f..84a6b81eb036fe67a8ae068138749cdbba668651 100644 (file)
--- a/compiler/oops/include/oops/UserExn.h
+++ b/compiler/oops/include/oops/UserExn.h
@@ -72,7 +72,9 @@ private:
      out << pepper::str(attr, " = ", val);
    }
  
-  void build_info(std::stringstream &) { /* empty */}
+  void build_info(std::stringstream &)
+  { /* empty */
+  }
  
    // when only one info of string is provided
    void build_info(std::stringstream &out, const std::string &val) { out << val; }
diff --git a/compiler/oops/src/oops.test.cpp b/compiler/oops/src/oops.test.cpp

new file mode 100644 (file)

index 0000000..666f62f
--- /dev/null
+++ b/compiler/oops/src/oops.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oops/InternalExn.h"
+#include "oops/UserExn.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void batman() { INTERNAL_EXN("Here comes Joker"); }
+
+void star_wars() { INTERNAL_EXN_V("Something is approaching", "Darth Vader"); }
+
+enum class InfinityStones
+{
+  SpaceStone,
+  RealityStone,
+  OtherStones,
+};
+
+void avengers()
+{
+  std::string where;
+  std::string separator = ":";
+  try
+  {
+    // exception will be raised in next line
+    where = __FILE__ + separator + std::to_string(__LINE__ + 1);
+    INTERNAL_EXN_V("Last stone was gathered", oops::to_uint32(InfinityStones::SpaceStone));
+  }
+  catch (const oops::InternalExn &e)
+  {
+    auto msg = std::string(e.what());
+    ASSERT_TRUE(msg.find("Last stone was gathered: 0") != std::string::npos);
+    ASSERT_TRUE(msg.find(where) != std::string::npos);
+  }
+}
+
+} // namespace
+
+TEST(oopsTest, InternalExn)
+{
+  ASSERT_THROW(batman(), oops::InternalExn);
+  ASSERT_THROW(star_wars(), oops::InternalExn);
+
+  avengers();
+}
+
+TEST(oopsTest, UserExn_one_info_after_msg)
+{
+  try
+  {
+    throw oops::UserExn("Not a member of Avenger", "Kingsman");
+  }
+  catch (const oops::UserExn &e)
+  {
+    auto msg = std::string(e.what());
+    ASSERT_TRUE(msg.find("Not a member of Avenger: Kingsman") != std::string::npos);
+  }
+}
+
+TEST(oopsTest, UserExn_two_pairs_after_msg)
+{
+  try
+  {
+    std::string hero("Spiderman");
+
+    // clang-format off
+    throw oops::UserExn("Hero's age is wrong",
+                        "Hero", hero,
+                        "Age", 97);
+    // clang-format on
+  }
+  catch (const oops::UserExn &e)
+  {
+    auto msg = std::string(e.what());
+    ASSERT_TRUE(msg.find("Hero = Spiderman, Age = 97") != std::string::npos);
+  }
+}
diff --git a/compiler/oops/test.cpp b/compiler/oops/test.cpp

deleted file mode 100644 (file)

index 666f62f..0000000
--- a/compiler/oops/test.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "oops/InternalExn.h"
-#include "oops/UserExn.h"
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-void batman() { INTERNAL_EXN("Here comes Joker"); }
-
-void star_wars() { INTERNAL_EXN_V("Something is approaching", "Darth Vader"); }
-
-enum class InfinityStones
-{
-  SpaceStone,
-  RealityStone,
-  OtherStones,
-};
-
-void avengers()
-{
-  std::string where;
-  std::string separator = ":";
-  try
-  {
-    // exception will be raised in next line
-    where = __FILE__ + separator + std::to_string(__LINE__ + 1);
-    INTERNAL_EXN_V("Last stone was gathered", oops::to_uint32(InfinityStones::SpaceStone));
-  }
-  catch (const oops::InternalExn &e)
-  {
-    auto msg = std::string(e.what());
-    ASSERT_TRUE(msg.find("Last stone was gathered: 0") != std::string::npos);
-    ASSERT_TRUE(msg.find(where) != std::string::npos);
-  }
-}
-
-} // namespace
-
-TEST(oopsTest, InternalExn)
-{
-  ASSERT_THROW(batman(), oops::InternalExn);
-  ASSERT_THROW(star_wars(), oops::InternalExn);
-
-  avengers();
-}
-
-TEST(oopsTest, UserExn_one_info_after_msg)
-{
-  try
-  {
-    throw oops::UserExn("Not a member of Avenger", "Kingsman");
-  }
-  catch (const oops::UserExn &e)
-  {
-    auto msg = std::string(e.what());
-    ASSERT_TRUE(msg.find("Not a member of Avenger: Kingsman") != std::string::npos);
-  }
-}
-
-TEST(oopsTest, UserExn_two_pairs_after_msg)
-{
-  try
-  {
-    std::string hero("Spiderman");
-
-    // clang-format off
-    throw oops::UserExn("Hero's age is wrong",
-                        "Hero", hero,
-                        "Age", 97);
-    // clang-format on
-  }
-  catch (const oops::UserExn &e)
-  {
-    auto msg = std::string(e.what());
-    ASSERT_TRUE(msg.find("Hero = Spiderman, Age = 97") != std::string::npos);
-  }
-}
diff --git a/compiler/pepper-str/CMakeLists.txt b/compiler/pepper-str/CMakeLists.txt

index cbe01b86a8f7bda1257261bd78a396c0243e1479..481073af7e41d473a6c8eabdd2bd73a471eb0465 100644 (file)
--- a/compiler/pepper-str/CMakeLists.txt
+++ b/compiler/pepper-str/CMakeLists.txt
@@ -1,5 +1,6 @@
  add_library(pepper_str INTERFACE)
  target_include_directories(pepper_str INTERFACE include)
+target_link_libraries(pepper_str INTERFACE nncc_coverage)
  
  if(NOT ENABLE_TEST)
    return()
@@ -8,5 +9,5 @@ endif(NOT ENABLE_TEST)
  # Google Test is mandatory for test
  nnas_find_package(GTest REQUIRED)
  
-GTest_AddTest(pepper_str_test test.cpp)
+GTest_AddTest(pepper_str_test src/pepper-str.test.cpp)
  target_link_libraries(pepper_str_test pepper_str)
diff --git a/compiler/pepper-str/include/pepper/str.h b/compiler/pepper-str/include/pepper/str.h

index efbc3a9c8e748b72671c0208ce03e05aef0bde2c..0c74aa85aefb35563ac0e46b10de84e7a83e558e 100644 (file)
--- a/compiler/pepper-str/include/pepper/str.h
+++ b/compiler/pepper-str/include/pepper/str.h
@@ -47,7 +47,7 @@ inline void str_impl(std::ostream &os, Arg &&arg, Args &&... args)
    str_impl(os, std::forward<Args>(args)...);
  }
  
-} // namesapce details
+} // namespace details
  } // namespace pepper
  
  namespace pepper
diff --git a/compiler/pepper-str/src/pepper-str.test.cpp b/compiler/pepper-str/src/pepper-str.test.cpp

new file mode 100644 (file)

index 0000000..222c371
--- /dev/null
+++ b/compiler/pepper-str/src/pepper-str.test.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/str.h"
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+TEST(StrTests, README)
+{
+  // Let us check whether the example in README.md works!
+  int argc = 4;
+
+  std::cout << pepper::str("There are ", argc, " arguments") << std::endl;
+
+  SUCCEED();
+}
+
+TEST(StrTests, Empty)
+{
+  // pepper::str() returns an empty string
+  ASSERT_EQ(pepper::str(), "");
+}
+
+TEST(StrTests, Single_Int)
+{
+  // Convert a single "int" value as a string
+  ASSERT_EQ(pepper::str(3), "3");
+}
+
+TEST(StrTests, Concat_000)
+{
+  const int n = 3;
+  const int m = 4;
+
+  ASSERT_EQ(pepper::str(n, "+", m, "=", n + m), "3+4=7");
+}
diff --git a/compiler/pepper-str/test.cpp b/compiler/pepper-str/test.cpp

deleted file mode 100644 (file)

index 222c371..0000000
--- a/compiler/pepper-str/test.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "pepper/str.h"
-
-#include <iostream>
-
-#include <gtest/gtest.h>
-
-TEST(StrTests, README)
-{
-  // Let us check whether the example in README.md works!
-  int argc = 4;
-
-  std::cout << pepper::str("There are ", argc, " arguments") << std::endl;
-
-  SUCCEED();
-}
-
-TEST(StrTests, Empty)
-{
-  // pepper::str() returns an empty string
-  ASSERT_EQ(pepper::str(), "");
-}
-
-TEST(StrTests, Single_Int)
-{
-  // Convert a single "int" value as a string
-  ASSERT_EQ(pepper::str(3), "3");
-}
-
-TEST(StrTests, Concat_000)
-{
-  const int n = 3;
-  const int m = 4;
-
-  ASSERT_EQ(pepper::str(n, "+", m, "=", n + m), "3+4=7");
-}
diff --git a/compiler/plier-tf/src/TestHelper.cpp b/compiler/plier-tf/src/TestHelper.cpp

index a551e89f9dc6e97978a358a93af4d560d1bec3a3..c1565b5ccfedab02b5b5c4383a62a68b4a307011 100644 (file)
--- a/compiler/plier-tf/src/TestHelper.cpp
+++ b/compiler/plier-tf/src/TestHelper.cpp
@@ -40,7 +40,7 @@ struct membuf : std::streambuf
  struct imemstream : virtual membuf, std::istream
  {
    imemstream(char const *base, size_t size)
-      : membuf(base, size), std::istream(static_cast<std::streambuf *>(this))
+    : membuf(base, size), std::istream(static_cast<std::streambuf *>(this))
    {
    }
  };
diff --git a/compiler/pota-quantization-value-test/compare_tensors.py b/compiler/pota-quantization-value-test/compare_tensors.py

index 9c9b639bd5271995df749023f9fad9d307bcf122..20e92c68b28570aa091d8d515e61be4fd5a71a97 100755 (executable)
--- a/compiler/pota-quantization-value-test/compare_tensors.py
+++ b/compiler/pota-quantization-value-test/compare_tensors.py
@@ -68,7 +68,7 @@ def compare_quantization(tensor, tensor_name, expect_dir):
      for key in json_load:
          if key == "weights":
              expected_weights = np.array(json_load["weights"])
-            input_weights = tensor["weights"][:]
+            input_weights = tensor["weights"][()]
              abs_tolerance = 1
              # We use higher tolerance for int64 data (bias of int16-quantized model)
              if tensor["weights"].dtype == 'int64':
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ifm.json

new file mode 100644 (file)

index 0000000..2fb0c68
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm1.json

new file mode 100644 (file)

index 0000000..239a3a4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014586378529202193,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm2.json

new file mode 100644 (file)

index 0000000..b4422f4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014956798986531794,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/split_dim.json

new file mode 100644 (file)

index 0000000..ac7cde1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+     0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ifm.json

new file mode 100644 (file)

index 0000000..5e333ac
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.909480743408203,
+  "max": 4.779518718719482
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm1.json

new file mode 100644 (file)

index 0000000..1d23f8d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.073143873214722,
+  "max": 4.779518718719482
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm2.json

new file mode 100644 (file)

index 0000000..ffd7d84
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.9008944129943846,
+  "max": 4.620573101043701
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ifm.json

new file mode 100644 (file)

index 0000000..aaba613
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm1.json

new file mode 100644 (file)

index 0000000..3c01348
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.035256847739219666,
+  "zero_point": 123.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm2.json

new file mode 100644 (file)

index 0000000..20ebde6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0385618582367897,
+  "zero_point": 129.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/split_dim.json

new file mode 100644 (file)

index 0000000..ac7cde1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+     0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ifm.json

new file mode 100644 (file)

index 0000000..c6dd194
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.959668273925781,
+  "max": 4.906183891296386
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm1.json

new file mode 100644 (file)

index 0000000..4f890dd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.3535110282897955,
+  "max": 4.636985759735107
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm2.json

new file mode 100644 (file)

index 0000000..78f9a64
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.959668273925781,
+  "max": 4.8736056804656975
+}
diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst

index dd16404284af55801f83201c1be43c0c8950e798..4beec8c0e57eeb2a807073b8fa6b7d3e71093253 100644 (file)
--- a/compiler/pota-quantization-value-test/test.lst
+++ b/compiler/pota-quantization-value-test/test.lst
@@ -26,6 +26,8 @@ addTest(PRelu_001 channel uint8)
  addTest(PRelu_001 channel int16)
  addTest(ReLU_000 layer uint8)
  addTest(ReLU_000 channel int16)
+addTest(Split_000 channel uint8)
+addTest(Split_000 channel int16)
  addTest(TransposeConv_001 channel uint8)
  addTest(TransposeConv_001 channel int16)
  addTest(TransposeConv_001 layer uint8)
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..4b999a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/0.txt
@@ -0,0 +1 @@
+ 3.241328  , 2.7033713 ,-2.5329788 ,-4.078369  ,-3.6711028 , 2.8912613 , 0.6188993 , 3.3729403 , 2.9906578 , 0.69040877, 0.6443222 , 1.1676162 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..7061063
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/1.txt
@@ -0,0 +1 @@
+ 1.572614  , 3.6147017 , 1.4378501 ,-0.81497866, 1.5987366 , 3.7698908 ,-3.8637109 , 4.5728784 ,-0.8706349 , 0.7389268 , 4.64117   ,-0.96047217
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..c048a8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/2.txt
@@ -0,0 +1 @@
+ 0.00864919,-3.1653113 ,-2.125551  , 2.9225516 ,-1.1439148 , 4.6509814 ,-2.097259  , 2.5843353 ,-2.067207  ,-2.5034845 ,-4.9441104 ,-3.9062042 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..55be3b4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/3.txt
@@ -0,0 +1 @@
+ 1.0920542 , 0.5510192 , 1.3465579 ,-2.3510268 , 4.016736  , 4.7848744 ,-0.42403316, 0.00571597, 1.6412207 , 1.7787368 , 2.4728034 ,-3.5900247 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..04c7a1a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.9799085,-3.9477375, 0.6402844, 3.304766 , 3.8880465,-3.5069442,-2.3702915, 4.126247 ,-3.1614416, 2.9909244,-2.8755414, 0.2627986
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/0.txt

new file mode 100644 (file)

index 0000000..0e8d687
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/0.txt
@@ -0,0 +1 @@
+-2.327701  , 1.9312059 ,-2.0069487 ,-1.2584914 ,-0.08435626, 0.47685367,-2.7456024 , 2.1275337 ,-4.9685698 , 1.8143541 , 0.52829266,-2.770121  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/1.txt

new file mode 100644 (file)

index 0000000..67732e8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/1.txt
@@ -0,0 +1 @@
+ 0.01133719,-3.3741624 , 3.556686  ,-4.21059   , 0.49977505, 1.768375  , 3.867543  , 2.270572  ,-3.9507272 ,-4.595618  ,-4.7460327 , 0.5856542 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/2.txt

new file mode 100644 (file)

index 0000000..7bc7124
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/2.txt
@@ -0,0 +1 @@
+-2.7181    , 4.6819983 , 2.9022477 ,-0.10716935, 3.6687856 ,-2.5403244 ,-4.477037  , 2.5499978 ,-3.9294813 , 0.08725335,-2.243345  ,-1.4018577 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/3.txt

new file mode 100644 (file)

index 0000000..0fac9fb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/3.txt
@@ -0,0 +1 @@
+-3.920553  , 0.87464577,-1.0319884 , 2.1885726 , 2.755115  ,-1.6436632 ,-4.4507327 , 4.915525  , 2.9331517 , 4.7712016 , 4.676084  ,-1.7715888 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/4.txt

new file mode 100644 (file)

index 0000000..df79104
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/4.txt
@@ -0,0 +1 @@
+-2.181168  ,-1.6011912 ,-4.359466  ,-1.3662407 ,-0.06876431,-2.9213328 ,-0.5463467 ,-3.7916536 ,-3.751455  ,-2.822578  , 0.8914152 ,-3.0267959 
diff --git a/compiler/pp/CMakeLists.txt b/compiler/pp/CMakeLists.txt

index 2c25c6406b8624dc7118629479fcfa3cceca21b9..6d58458ca194c8fff894ef6fb0a0c93bc4ad403a 100644 (file)
--- a/compiler/pp/CMakeLists.txt
+++ b/compiler/pp/CMakeLists.txt
@@ -6,6 +6,7 @@ add_library(pp STATIC ${SOURCES})
  set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(pp PUBLIC include)
  target_link_libraries(pp PRIVATE nncc_common)
+target_link_libraries(pp PUBLIC nncc_coverage)
  
  if(NOT ENABLE_TEST)
    return()
diff --git a/compiler/record-minmax-conversion-test/gen_h5_random_inputs.py b/compiler/record-minmax-conversion-test/gen_h5_random_inputs.py

index bdf86fe29486bbb2b6f5ac468aa8455067e864c5..d57289abf1f3dbec660baf77ff4eb3f651e9d247 100755 (executable)
--- a/compiler/record-minmax-conversion-test/gen_h5_random_inputs.py
+++ b/compiler/record-minmax-conversion-test/gen_h5_random_inputs.py
@@ -39,9 +39,16 @@ for i in range(num_data):
  
      for j in range(len(input_details)):
          input_detail = input_details[j]
-        # Generate random input [-5, 5)
-        input_data = np.array(10 * np.random.random_sample(input_detail["shape"]) - 5,
-                              input_detail["dtype"])
+        print(input_detail["dtype"])
+        if input_detail["dtype"] == np.bool_:
+            # Generate random bool [0, 1]
+            input_data = np.array(
+                np.random.random_integers(0, 1, input_detail["shape"]),
+                input_detail["dtype"])
+        elif input_detail["dtype"] == np.float32:
+            # Generate random input [-5, 5)
+            input_data = np.array(10 * np.random.random_sample(input_detail["shape"]) - 5,
+                                  input_detail["dtype"])
          sample.create_dataset(str(j), data=input_data)
  
  h5_file.close()
diff --git a/compiler/record-minmax-conversion-test/testall.sh b/compiler/record-minmax-conversion-test/testall.sh

index 29c9ed3d133da71567d498c704e2d2ffd24c9b8c..d7fc1de53106c9c806b69b472e2f24629e92dbea 100755 (executable)
--- a/compiler/record-minmax-conversion-test/testall.sh
+++ b/compiler/record-minmax-conversion-test/testall.sh
@@ -55,6 +55,16 @@ for TESTCASE in "$@"; do
        --input_data "${BIN_PATH}/${TESTCASE}.tflite.input.h5" \
        --output_model "${BIN_PATH}/${TESTCASE}.out.circle"
  
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE CIRCLE OUTPUT"
+      continue
+    fi
+
+    # Run record-minmax with auto generated random input
+    "${RECORD_MINMAX_PATH}" \
+      --input_model "${TESTCASE_FILE}.circle" \
+      --output_model "${BIN_PATH}/${TESTCASE}.outr.circle"
+
      if [[ $? -eq 0 ]]; then
        touch "${PASSED_TAG}"
      fi
diff --git a/compiler/record-minmax/CMakeLists.txt b/compiler/record-minmax/CMakeLists.txt

index f8a165bd3f486261057d5d140f7e28fe9d23f9f8..da63bbf5f82400de54027b0202469bd75a3acc0e 100644 (file)
--- a/compiler/record-minmax/CMakeLists.txt
+++ b/compiler/record-minmax/CMakeLists.txt
@@ -17,9 +17,11 @@ target_link_libraries(record-minmax ${HDF5_CXX_LIBRARIES})
  target_link_libraries(record-minmax arser)
  target_link_libraries(record-minmax safemain)
  target_link_libraries(record-minmax luci_import)
+target_link_libraries(record-minmax luci_env)
  target_link_libraries(record-minmax luci_export)
  target_link_libraries(record-minmax luci_interpreter)
  target_link_libraries(record-minmax vconone)
+target_link_libraries(record-minmax nncc_coverage)
  
  install(TARGETS record-minmax DESTINATION bin)
  
@@ -27,6 +29,9 @@ if(NOT ENABLE_TEST)
    return()
  endif(NOT ENABLE_TEST)
  
+file(GLOB_RECURSE TESTS "tests/*.test.cpp")
+
  nnas_find_package(GTest REQUIRED)
-GTest_AddTest(record_minmax_function_test "${CMAKE_CURRENT_SOURCE_DIR}/tests/RecordFunction.test.cpp")
+GTest_AddTest(record_minmax_function_test "${TESTS}")
  target_include_directories(record_minmax_function_test PRIVATE include)
+target_link_libraries(record_minmax_function_test nncc_coverage)
diff --git a/compiler/record-minmax/driver/Driver.cpp b/compiler/record-minmax/driver/Driver.cpp

index 8b09498c3df2afe48012589d0a401cbf2f10ccfa..6dbb693b2ca8fbf83c3cddcb75ad371ea525935a 100644 (file)
--- a/compiler/record-minmax/driver/Driver.cpp
+++ b/compiler/record-minmax/driver/Driver.cpp
@@ -19,6 +19,8 @@
  #include <arser/arser.h>
  #include <vconone/vconone.h>
  
+#include <luci/UserSettings.h>
+
  void print_version(void)
  {
    std::cout << "record-minmax version " << vconone::get_string() << std::endl;
@@ -30,47 +32,55 @@ int entry(const int argc, char **argv)
    using namespace record_minmax;
  
    arser::Arser arser(
-      "Embedding min/max values of activations to the circle model for post-training quantization");
+    "Embedding min/max values of activations to the circle model for post-training quantization");
  
    arser.add_argument("--version")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("Show version information and exit")
-      .exit_with(print_version);
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Show version information and exit")
+    .exit_with(print_version);
  
    arser.add_argument("--input_model")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(true)
-      .help("Input model filepath");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(true)
+    .help("Input model filepath");
  
    arser.add_argument("--input_data")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(true)
-      .help("Input data filepath");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Input data filepath. If not given, record-minmax will run with randomly generated data. "
+          "Note that the random dataset does not represent inference workload, leading to poor "
+          "model accuracy.");
  
    arser.add_argument("--output_model")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(true)
-      .help("Output model filepath");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(true)
+    .help("Output model filepath");
  
    arser.add_argument("--min_percentile")
-      .nargs(1)
-      .type(arser::DataType::FLOAT)
-      .help("Record n'th percentile of min");
+    .nargs(1)
+    .type(arser::DataType::FLOAT)
+    .help("Record n'th percentile of min");
  
    arser.add_argument("--max_percentile")
-      .nargs(1)
-      .type(arser::DataType::FLOAT)
-      .help("Record n'th percentile of max");
+    .nargs(1)
+    .type(arser::DataType::FLOAT)
+    .help("Record n'th percentile of max");
  
    arser.add_argument("--mode")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("Record mode. percentile (default) or moving_average");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Record mode. percentile (default) or moving_average");
+
+  arser.add_argument("--generate_profile_data")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will turn on profiling data generation.");
  
    try
    {
@@ -83,8 +93,9 @@ int entry(const int argc, char **argv)
      return 255;
    }
  
+  auto settings = luci::UserSettings::settings();
+
    auto input_model_path = arser.get<std::string>("--input_model");
-  auto input_data_path = arser.get<std::string>("--input_data");
    auto output_model_path = arser.get<std::string>("--output_model");
  
    // Default values
@@ -104,13 +115,26 @@ int entry(const int argc, char **argv)
    if (mode != "percentile" && mode != "moving_average")
      throw std::runtime_error("Unsupported mode");
  
+  if (arser["--generate_profile_data"])
+    settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+
    RecordMinMax rmm;
  
    // Initialize interpreter and observer
    rmm.initialize(input_model_path);
  
-  // Profile min/max while executing the given input data
-  rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+  if (arser["--input_data"])
+  {
+    auto input_data_path = arser.get<std::string>("--input_data");
+
+    // Profile min/max while executing the given input data
+    rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+  }
+  else
+  {
+    // Profile min/max while executing random input data
+    rmm.profileDataWithRandomInputs(mode, min_percentile, max_percentile);
+  }
  
    // Save profiled values to the model
    rmm.saveModel(output_model_path);
diff --git a/compiler/record-minmax/include/RecordFunction.h b/compiler/record-minmax/include/RecordFunction.h

index b570c6a0adb7d4b2ee24c7804be58d6bfa62310a..c34aee0e1f416260ffcbf136c29c48686f3f0e44 100644 (file)
--- a/compiler/record-minmax/include/RecordFunction.h
+++ b/compiler/record-minmax/include/RecordFunction.h
@@ -53,7 +53,7 @@ float getNthPercentile(std::vector<float> &vector, float percentile)
  
    float percent_i = static_cast<float>(index) / static_cast<float>(copy.size() - 1);
    float fraction =
-      (percentile / 100.0 - percent_i) / ((index + 1.0) / (copy.size() - 1.0) - percent_i);
+    (percentile / 100.0 - percent_i) / ((index + 1.0) / (copy.size() - 1.0) - percent_i);
    float res = copy[index] + fraction * (copy[index + 1] - copy[index]);
    return res;
  }
diff --git a/compiler/record-minmax/include/RecordMinMax.h b/compiler/record-minmax/include/RecordMinMax.h

index ffdb17aec6de9b436307fc99dd385f29ff6bbb09..85ae4cdc7d822eee66b1124e0b7c357df56ce5a5 100644 (file)
--- a/compiler/record-minmax/include/RecordMinMax.h
+++ b/compiler/record-minmax/include/RecordMinMax.h
@@ -39,6 +39,9 @@ public:
    void profileData(const std::string &mode, const std::string &input_data_path,
                     float min_percentile, float max_percentile);
  
+  void profileDataWithRandomInputs(const std::string &mode, float min_percentile,
+                                   float max_percentile);
+
    void saveModel(const std::string &output_model_path);
  
  private:
diff --git a/compiler/record-minmax/requires.cmake b/compiler/record-minmax/requires.cmake

index f6804cef1669ac4cae14662f247e7935c97068a4..9cf12591e1e8e6b77aee78ede39099d3f947c4b0 100644 (file)
--- a/compiler/record-minmax/requires.cmake
+++ b/compiler/record-minmax/requires.cmake
@@ -1,4 +1,5 @@
  require("luci")
+require("luci-interpreter")
  require("safemain")
  require("arser")
  require("vconone")
diff --git a/compiler/record-minmax/src/HDF5Importer.cpp b/compiler/record-minmax/src/HDF5Importer.cpp

index a0e65eeb772f981a7b8e0e11ba5364f29b17e3c1..cfb270ce01c0047f80e777ce9ad73a49468b8db2 100644 (file)
--- a/compiler/record-minmax/src/HDF5Importer.cpp
+++ b/compiler/record-minmax/src/HDF5Importer.cpp
@@ -59,7 +59,30 @@ DataType toInternalDtype(const H5::DataType &h5_type)
    {
      return DataType::S64;
    }
-  // Only support three datatypes for now
+  if (h5_type.getClass() == H5T_class_t::H5T_ENUM)
+  {
+    // We follow the numpy format
+    // In numpy 1.19.0, np.bool_ is saved as H5T_ENUM
+    // - (name, value) -> (FALSE, 0) and (TRUE, 1)
+    // - value dtype is H5T_STD_I8LE
+    // TODO Find a general way to recognize BOOL type
+    char name[10];
+    int8_t value[2] = {0, 1};
+    if (H5Tenum_nameof(h5_type.getId(), value, name, 10) < 0)
+      return DataType::Unknown;
+
+    if (std::string(name) != "FALSE")
+      return DataType::Unknown;
+
+    if (H5Tenum_nameof(h5_type.getId(), value + 1, name, 10) < 0)
+      return DataType::Unknown;
+
+    if (std::string(name) != "TRUE")
+      return DataType::Unknown;
+
+    return DataType::BOOL;
+  }
+  // TODO Support more datatypes
    return DataType::Unknown;
  }
  
@@ -125,6 +148,9 @@ void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *d
      case DataType::S64:
        readTensorData(tensor, static_cast<int64_t *>(buffer));
        break;
+    case DataType::BOOL:
+      readTensorData(tensor, static_cast<uint8_t *>(buffer));
+      break;
      default:
        throw std::runtime_error{"Unsupported data type for input data (.h5)"};
    }
diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp

index c22cb41322cdd50c9fd2bf68ab0bcced12f5d07a..40c9b730dd8877889e6c1b0c301862c1c2576ad4 100644 (file)
--- a/compiler/record-minmax/src/MinMaxObserver.cpp
+++ b/compiler/record-minmax/src/MinMaxObserver.cpp
@@ -18,6 +18,8 @@
  
  #include <luci/IR/CircleOpcode.h>
  
+#include <math.h>
+
  using DataType = luci_interpreter::DataType;
  
  namespace record_minmax
@@ -51,6 +53,12 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
      return;
    }
  
+  if (node->dtype() == DataType::BOOL)
+  {
+    // Bool type tensor is not quantized
+    return;
+  }
+
    // Only support recording of float32 values
    if (tensor->element_type() != DataType::FLOAT32)
      throw std::runtime_error("Tensor's data type is not float");
@@ -59,9 +67,27 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
    const auto num_elements = tensor->shape().num_elements();
  
    std::vector<float> buf(data, data + num_elements);
-  auto minmax = std::minmax_element(buf.begin(), buf.end());
-  float min = *minmax.first;
-  float max = *minmax.second;
+
+  float max = std::numeric_limits<float>::lowest();
+  float min = std::numeric_limits<float>::max();
+
+  bool all_nan = true;
+  for (auto number : buf)
+  {
+    if (isnan(number))
+      continue;
+
+    all_nan = false;
+
+    if (number > max)
+      max = number;
+
+    if (number < min)
+      min = number;
+  }
+
+  if (all_nan)
+    throw std::runtime_error("All values are NaN(Not a Number)");
  
    _minmax_data.recordMinMax(node, min, max);
  }
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp

index cd5f293529aec853366452f237030b3828f32e18..333ff5e3bd3ad6f10a3370a4eeabc38fc1adb900 100644 (file)
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -30,6 +30,7 @@
  #include <numeric>
  #include <stdexcept>
  #include <iostream>
+#include <random>
  
  using Shape = luci_interpreter::Shape;
  using DataType = luci_interpreter::DataType;
@@ -37,6 +38,18 @@ using DataType = luci_interpreter::DataType;
  namespace
  {
  
+std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements)
+{
+  std::uniform_int_distribution<> dist(0, 1);
+  std::vector<uint8_t> input_data(num_elements);
+
+  // Write random data
+  for (auto &iter : input_data)
+    iter = static_cast<uint8_t>(dist(gen));
+
+  return input_data;
+}
+
  /**
   * @brief  getTensorSize will return size in bytes
   */
@@ -68,6 +81,38 @@ void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype,
    }
  }
  
+void update_quantparam(record_minmax::MinMaxObserver *observer, const std::string &mode,
+                       float min_percentile, float max_percentile)
+{
+  auto minmax_map = observer->minMaxData()->getMap();
+  for (auto iter = minmax_map->begin(); iter != minmax_map->end(); ++iter)
+  {
+    auto node = iter->first;
+    auto minmax = iter->second;
+
+    float min{0.0f}, max{0.0f};
+    if (mode == "percentile")
+    {
+      min = record_minmax::getNthPercentile(minmax.min_vector, min_percentile);
+      max = record_minmax::getNthPercentile(minmax.max_vector, max_percentile);
+    }
+    else if (mode == "moving_average")
+    {
+      min = record_minmax::getMovingAverage(minmax.min_vector, 0.9, 16, true);
+      max = record_minmax::getMovingAverage(minmax.max_vector, 0.9, 16, false);
+    }
+    assert(mode == "percentile" || mode == "moving_average");
+    auto quantparam = std::make_unique<luci::CircleQuantParam>();
+    quantparam->min.push_back(min);
+    quantparam->max.push_back(max);
+
+    assert(node->quantparam() == nullptr);
+
+    auto mutable_node = const_cast<luci::CircleNode *>(node);
+    mutable_node->quantparam(std::move(quantparam));
+  }
+}
+
  } // namespace
  
  namespace record_minmax
@@ -169,33 +214,75 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
      throw std::runtime_error("HDF5 error occurred.");
    }
  
-  auto minmax_map = _observer->minMaxData()->getMap();
-  for (auto iter = minmax_map->begin(); iter != minmax_map->end(); ++iter)
+  update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+}
+
+void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float min_percentile,
+                                               float max_percentile)
+{
+  // We use three randomly-generated records
+  const uint32_t num_records = 3;
+
+  const auto input_nodes = loco::input_nodes(_module->graph());
+  const auto num_inputs = input_nodes.size();
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<> dist(-5, 5);
+
+  for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
    {
-    auto node = iter->first;
-    auto minmax = iter->second;
+    std::cout << "Recording " << record_idx << "'th data" << std::endl;
  
-    float min{0.0f}, max{0.0f};
-    if (mode == "percentile")
+    for (int32_t input_idx = 0; input_idx < num_inputs; input_idx++)
      {
-      min = getNthPercentile(minmax.min_vector, min_percentile);
-      max = getNthPercentile(minmax.max_vector, max_percentile);
-    }
-    else if (mode == "moving_average")
-    {
-      min = getMovingAverage(minmax.min_vector, 0.9, 16, true);
-      max = getMovingAverage(minmax.max_vector, 0.9, 16, false);
-    }
-    assert(mode == "percentile" || mode == "moving_average");
-    auto quantparam = std::make_unique<luci::CircleQuantParam>();
-    quantparam->min.push_back(min);
-    quantparam->max.push_back(max);
+      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+      assert(input_node->index() == input_idx);
+      uint32_t num_elements = 1;
+      for (uint32_t i = 0; i < input_node->rank(); i++)
+      {
+        if (!input_node->dim(i).known())
+          throw std::runtime_error("Input dimension must be known");
  
-    assert(node->quantparam() == nullptr);
+        num_elements *= input_node->dim(i).value();
+      }
  
-    auto mutable_node = const_cast<luci::CircleNode *>(node);
-    mutable_node->quantparam(std::move(quantparam));
+      if (num_elements == 0)
+        throw std::runtime_error("Only support non-zero sized inputs");
+
+      // TODO Support more input data types
+      assert(input_node->dtype() == loco::DataType::FLOAT32 ||
+             input_node->dtype() == loco::DataType::BOOL);
+
+      if (input_node->dtype() == DataType::FLOAT32)
+      // clang-format off
+      {
+      std::vector<float> input_data(num_elements);
+
+      // Write random data
+      for (auto &iter : input_data)
+        iter = static_cast<float>(dist(gen));
+
+      // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
+      //       We can redcue the copy by directly writing data from file to interpreter inputs
+      _interpreter->writeInputTensor(input_node, input_data.data(),
+                                     input_data.size() * sizeof(float));
+      }
+      // clang-format on
+      else if (input_node->dtype() == DataType::BOOL)
+      {
+        auto input_data = genRandomBoolData(gen, num_elements);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(uint8_t));
+      }
+    }
+
+    _interpreter->interpret();
    }
+
+  std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+
+  update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
  }
  
  void RecordMinMax::saveModel(const std::string &output_model_path)
diff --git a/compiler/record-minmax/tests/RecordFunction.test.cpp b/compiler/record-minmax/tests/RecordFunction.test.cpp

index e2f135a4e4f3d45a5b963fd95f837e4172d8baaf..0d863225484b6325afd522c76c39b1afad2442cb 100644 (file)
--- a/compiler/record-minmax/tests/RecordFunction.test.cpp
+++ b/compiler/record-minmax/tests/RecordFunction.test.cpp
@@ -115,4 +115,12 @@ TEST(GetNthPercentileTest, EmptyVector_NEG)
    SUCCEED();
  }
  
+TEST(GetMovingAverageTest, Simple)
+{
+  std::vector<float> input{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+  EXPECT_NE(0, getMovingAverage(input, 0.5, 4, true));
+  EXPECT_NE(0, getMovingAverage(input, 0.5, 4, false));
+}
+
  } // namespace record_minmax
diff --git a/compiler/souschef/include/souschef/Data/Gaussian.h b/compiler/souschef/include/souschef/Data/Gaussian.h

index 75570e0b8120f3bf1d89d9908901cfd452d8561b..8093b4c41cc6789657f1444d301753a8b7e7a6fb 100644 (file)
--- a/compiler/souschef/include/souschef/Data/Gaussian.h
+++ b/compiler/souschef/include/souschef/Data/Gaussian.h
@@ -57,6 +57,22 @@ private:
    float _stddev;
  };
  
+class GaussianInt16DataChef final : public DataChef
+{
+public:
+  GaussianInt16DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+  float _mean;
+  float _stddev;
+};
+
  class GaussianUint8DataChef final : public DataChef
  {
  public:
@@ -83,6 +99,11 @@ struct GaussianInt32DataChefFactory : public DataChefFactory
    std::unique_ptr<DataChef> create(const Arguments &args) const;
  };
  
+struct GaussianInt16DataChefFactory : public DataChefFactory
+{
+  std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
  struct GaussianUint8DataChefFactory : public DataChefFactory
  {
    std::unique_ptr<DataChef> create(const Arguments &args) const;
diff --git a/compiler/souschef/include/souschef/DataChef.def b/compiler/souschef/include/souschef/DataChef.def

index 28901db18ff4851043179aa83badf373ca556513..d724d0390d271d236ced10d7dca01b277e14af82 100644 (file)
--- a/compiler/souschef/include/souschef/DataChef.def
+++ b/compiler/souschef/include/souschef/DataChef.def
@@ -7,13 +7,16 @@
  DATA_CHEF(FLOAT32, constant, ConstantDataChefFactory<float>)
  DATA_CHEF(BOOL, constant, ConstantDataChefFactory<bool>)
  DATA_CHEF(UINT8, constant, ConstantDataChefFactory<uint8_t>)
+DATA_CHEF(INT16, constant, ConstantDataChefFactory<int16_t>)
  DATA_CHEF(INT32, constant, ConstantDataChefFactory<int32_t>)
  DATA_CHEF(INT64, constant, ConstantDataChefFactory<int64_t>)
  DATA_CHEF(INT64, explicit, ExplicitDataChefFactory<int64_t>)
  DATA_CHEF(INT32, explicit, ExplicitDataChefFactory<int32_t>)
+DATA_CHEF(INT16, explicit, ExplicitDataChefFactory<int16_t>)
  DATA_CHEF(UINT8, explicit, ExplicitDataChefFactory<uint8_t>)
  DATA_CHEF(BOOL, explicit, ExplicitDataChefFactory<bool>)
  DATA_CHEF(FLOAT32, explicit, ExplicitDataChefFactory<float>)
  DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory)
  DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory)
+DATA_CHEF(INT16, gaussian, GaussianInt16DataChefFactory)
  DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory)
diff --git a/compiler/souschef/src/Gaussian.cpp b/compiler/souschef/src/Gaussian.cpp

index 4a5083d8e09951a18d2a7da7496620bb41aa6971..32cbcff4d31f8953ec4a9d8118ea0f85f7857bf3 100644 (file)
--- a/compiler/souschef/src/Gaussian.cpp
+++ b/compiler/souschef/src/Gaussian.cpp
@@ -26,22 +26,25 @@
  namespace souschef
  {
  
-std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
+template <typename T>
+static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float stddev,
+                                              std::minstd_rand::result_type seed)
  {
-  // TODO Support seed value override
-  auto seed = std::chrono::system_clock::now().time_since_epoch().count();
-
    std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
-  std::normal_distribution<float> dist{_mean, _stddev};
+  std::normal_distribution<float> dist{mean, stddev};
  
    std::vector<uint8_t> res;
  
+  constexpr float max_cap = std::numeric_limits<T>::max();
+  constexpr float min_cap = std::numeric_limits<T>::min();
    for (uint32_t n = 0; n < count; ++n)
    {
-    auto const value = dist(rand);
+    float raw_value = dist(rand);
+    const float capped_value = std::max(min_cap, std::min(max_cap, raw_value));
+    auto const value = static_cast<T>(capped_value);
      auto const arr = reinterpret_cast<const uint8_t *>(&value);
  
-    for (uint32_t b = 0; b < sizeof(float); ++b)
+    for (uint32_t b = 0; b < sizeof(T); ++b)
      {
        res.emplace_back(arr[b]);
      }
@@ -50,52 +53,35 @@ std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
    return res;
  }
  
-std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
+template <typename T>
+static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float stddev)
  {
-  // TODO Support seed value override
-  auto seed = std::chrono::system_clock::now().time_since_epoch().count();
+  auto time_stamp = std::chrono::system_clock::now().time_since_epoch().count();
  
-  std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
-  std::normal_distribution<float> dist{_mean, _stddev};
+  // Note this is implementation defined, change if needed.
+  auto seed = static_cast<std::minstd_rand::result_type>(time_stamp);
  
-  std::vector<uint8_t> res;
+  return generate_gaussian<T>(count, mean, stddev, seed);
+}
  
-  for (uint32_t n = 0; n < count; ++n)
-  {
-    auto const value = static_cast<int32_t>(dist(rand));
-    auto const arr = reinterpret_cast<const uint8_t *>(&value);
+std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
+{
+  return generate_gaussian<float>(count, _mean, _stddev);
+}
  
-    for (uint32_t b = 0; b < sizeof(int32_t); ++b)
-    {
-      res.emplace_back(arr[b]);
-    }
-  }
+std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
+{
+  return generate_gaussian<int32_t>(count, _mean, _stddev);
+}
  
-  return res;
+std::vector<uint8_t> GaussianInt16DataChef::generate(int32_t count) const
+{
+  return generate_gaussian<int16_t>(count, _mean, _stddev);
  }
  
  std::vector<uint8_t> GaussianUint8DataChef::generate(int32_t count) const
  {
-  // TODO Support seed value override
-  auto seed = std::chrono::system_clock::now().time_since_epoch().count();
-
-  std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
-  std::normal_distribution<float> dist{_mean, _stddev};
-
-  std::vector<uint8_t> res;
-
-  for (uint32_t n = 0; n < count; ++n)
-  {
-    auto const value = static_cast<uint8_t>(dist(rand));        // uint8_t for data type
-    auto const arr = reinterpret_cast<const uint8_t *>(&value); // uint8_t for byte streaming
-
-    for (uint32_t b = 0; b < sizeof(uint8_t); ++b)
-    {
-      res.emplace_back(arr[b]);
-    }
-  }
-
-  return res;
+  return generate_gaussian<uint8_t>(count, _mean, _stddev);
  }
  
  std::unique_ptr<DataChef> GaussianFloat32DataChefFactory::create(const Arguments &args) const
@@ -124,6 +110,19 @@ std::unique_ptr<DataChef> GaussianInt32DataChefFactory::create(const Arguments &
    return std::unique_ptr<DataChef>{new GaussianInt32DataChef{mean, stddev}};
  }
  
+std::unique_ptr<DataChef> GaussianInt16DataChefFactory::create(const Arguments &args) const
+{
+  if (args.count() != 2)
+  {
+    throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+  }
+
+  auto const mean = to_number<float>(args.value(0));
+  auto const stddev = to_number<float>(args.value(1));
+
+  return std::unique_ptr<DataChef>{new GaussianInt16DataChef{mean, stddev}};
+}
+
  std::unique_ptr<DataChef> GaussianUint8DataChefFactory::create(const Arguments &args) const
  {
    if (args.count() != 2)
diff --git a/compiler/souschef/src/LexicalCast.cpp b/compiler/souschef/src/LexicalCast.cpp

index 8e3d4cbbb084681b01b0c5819aaf87485d3c0258..1af6e30f9b8436edb7c391a2e9109a9940f45961 100644 (file)
--- a/compiler/souschef/src/LexicalCast.cpp
+++ b/compiler/souschef/src/LexicalCast.cpp
@@ -18,12 +18,25 @@
  
  #include <cassert>
  #include <limits>
+#include <stdexcept>
  
  namespace souschef
  {
  
  template <> float to_number(const std::string &s) { return std::stof(s); }
  template <> int to_number(const std::string &s) { return std::stoi(s); }
+template <> int16_t to_number(const std::string &s)
+{
+  // There are no standard function to parse int16_t or short int
+  // This function simulates behavior similar stoi, stol and stoll
+  int res = std::stol(s);
+  // standard does not specify string in error message, this is arbitrary
+  if (res < std::numeric_limits<int16_t>::min() || res > std::numeric_limits<int16_t>::max())
+  {
+    throw std::out_of_range("to_number<int16_t>");
+  }
+  return res;
+}
  template <> int64_t to_number(const std::string &s) { return std::stoll(s); }
  template <> uint8_t to_number(const std::string &s)
  {
diff --git a/compiler/stdex/CMakeLists.txt b/compiler/stdex/CMakeLists.txt

deleted file mode 100644 (file)

index 91f07e6..0000000
--- a/compiler/stdex/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-file(GLOB_RECURSE TESTS "src/*.test.cpp")
-
-add_library(stdex INTERFACE)
-target_include_directories(stdex INTERFACE include)
-
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-# Google Test is mandatory for test
-nnas_find_package(GTest REQUIRED)
-
-add_executable(stdex_test ${TESTS})
-target_link_libraries(stdex_test stdex)
-target_link_libraries(stdex_test gtest_main)
-add_test(stdex_test stdex_test)
diff --git a/compiler/stdex/README.md b/compiler/stdex/README.md

deleted file mode 100644 (file)

index 054d085..0000000
--- a/compiler/stdex/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# stdex
-
-`stdex` is an extension over standard C++ libraries.
-
-# How to use
-
-Please read each header files.
-
-One example of `stdex::make_unique(..)` in `compiler/stdex/Memory.h` is as follows:
-
-```cpp
-#include <stdex/Memory.h>
-
-using stdex::make_unique;
-
-class A { ... };
-
-...
-
-std::unique_ptr<A> a = make_unique<A>(); // Note: std::make_unique is not supported in C++ 11
-
-```
diff --git a/compiler/stdex/include/stdex/Memory.h b/compiler/stdex/include/stdex/Memory.h

deleted file mode 100644 (file)

index 86751f0..0000000
--- a/compiler/stdex/include/stdex/Memory.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_MEMORY_H__
-#define __STDEX_MEMORY_H__
-
-#include <memory>
-
-namespace stdex
-{
-
-using std::make_unique;
-
-} // namespace stdex
-
-#endif // __STDEX_MEMORY_H__
diff --git a/compiler/stdex/include/stdex/Queue.h b/compiler/stdex/include/stdex/Queue.h

deleted file mode 100644 (file)

index c72297b..0000000
--- a/compiler/stdex/include/stdex/Queue.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_QUEUE_H__
-#define __STDEX_QUEUE_H__
-
-#include <queue>
-
-namespace stdex
-{
-
-/**
- * @brief Take the front (= first) element from the queue
- * @note The queue SHOULD have at least one element
- */
-template <typename T> T take(std::queue<T> &q)
-{
-  auto res = q.front();
-  q.pop();
-  return res;
-}
-
-} // namespace stdex
-
-#endif // __STDEX_QUEUE_H__
diff --git a/compiler/stdex/include/stdex/Set.h b/compiler/stdex/include/stdex/Set.h

deleted file mode 100644 (file)

index 2c61e0d..0000000
--- a/compiler/stdex/include/stdex/Set.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_SET_H__
-#define __STDEX_SET_H__
-
-#include <set>
-
-template <typename T> bool operator==(const std::set<T> &lhs, const std::set<T> &rhs)
-{
-  if (rhs.size() != lhs.size())
-  {
-    return false;
-  }
-
-  for (const auto &element : lhs)
-  {
-    if (rhs.find(element) == rhs.end())
-    {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-template <typename T> std::set<T> operator-(const std::set<T> &lhs, const std::set<T> &rhs)
-{
-  std::set<T> res;
-
-  for (const auto &element : lhs)
-  {
-    if (rhs.find(element) == rhs.end())
-    {
-      res.insert(element);
-    }
-  }
-
-  return res;
-}
-
-#endif // __STDEX_SET_H__
diff --git a/compiler/stdex/src/Memory.test.cpp b/compiler/stdex/src/Memory.test.cpp

deleted file mode 100644 (file)

index 433af45..0000000
--- a/compiler/stdex/src/Memory.test.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Memory.h"
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-struct Stat
-{
-  unsigned allocated = 0;
-  unsigned freed = 0;
-};
-
-struct Counter
-{
-public:
-  Counter(Stat *stat) : _stat{stat} { _stat->allocated += 1; }
-
-public:
-  ~Counter() { _stat->freed += 1; }
-
-private:
-  Stat *_stat;
-};
-
-} // namespace
-
-TEST(MemoryTest, make_unique)
-{
-  Stat stat;
-
-  ASSERT_EQ(stat.allocated, 0);
-  ASSERT_EQ(stat.freed, 0);
-
-  auto o = stdex::make_unique<::Counter>(&stat);
-
-  ASSERT_EQ(stat.allocated, 1);
-  ASSERT_EQ(stat.freed, 0);
-
-  o.reset();
-
-  ASSERT_EQ(stat.allocated, 1);
-  ASSERT_EQ(stat.freed, 1);
-}
diff --git a/compiler/stdex/src/Queue.test.cpp b/compiler/stdex/src/Queue.test.cpp

deleted file mode 100644 (file)

index d76cd3e..0000000
--- a/compiler/stdex/src/Queue.test.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Queue.h"
-
-#include <gtest/gtest.h>
-
-TEST(QueueTest, take)
-{
-  std::queue<int> q;
-
-  q.emplace(3);
-  q.emplace(4);
-  q.emplace(5);
-
-  ASSERT_EQ(stdex::take(q), 3);
-  ASSERT_EQ(stdex::take(q), 4);
-  ASSERT_EQ(stdex::take(q), 5);
-}
diff --git a/compiler/stdex/src/Set.test.cpp b/compiler/stdex/src/Set.test.cpp

deleted file mode 100644 (file)

index 9036193..0000000
--- a/compiler/stdex/src/Set.test.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Set.h"
-
-#include <gtest/gtest.h>
-
-TEST(SET, operator_eq)
-{
-  ASSERT_TRUE(std::set<int>({1, 2, 3}) == std::set<int>({1, 2, 3}));
-  ASSERT_FALSE(std::set<int>({1, 3}) == std::set<int>({1, 2, 3}));
-}
-
-TEST(SET, operator_diff)
-{
-  const std::set<int> lhs{1, 2, 3};
-  const std::set<int> rhs{2, 4};
-
-  auto res = lhs - rhs;
-
-  ASSERT_EQ(res.size(), 2);
-  ASSERT_NE(res.find(1), res.end());
-  ASSERT_NE(res.find(3), res.end());
-}
diff --git a/compiler/tf2circle/CMakeLists.txt b/compiler/tf2circle/CMakeLists.txt

index 549f731a4ac20b9154e2ca4ae98867d19718a2a2..8678e90b4a2241fc8f3b2be9e781d1709ca6354b 100644 (file)
--- a/compiler/tf2circle/CMakeLists.txt
+++ b/compiler/tf2circle/CMakeLists.txt
@@ -40,7 +40,6 @@ target_link_libraries(tf2circle PRIVATE tfinfo)
  target_link_libraries(tf2circle PRIVATE exo)
  target_link_libraries(tf2circle PRIVATE locop)
  target_link_libraries(tf2circle PRIVATE hermes_std)
-target_link_libraries(tf2circle PRIVATE stdex)
  target_link_libraries(tf2circle PRIVATE angkor cwrap)
  target_link_libraries(tf2circle PRIVATE tf2circle_customop_info_proto)
  
diff --git a/compiler/tf2circle/requires.cmake b/compiler/tf2circle/requires.cmake

index 68d45bf3a0d97ce040c4d9cb5d665ea5f753e26a..87ea50bf77a917b72dc636d23accfd03720c7a01 100644 (file)
--- a/compiler/tf2circle/requires.cmake
+++ b/compiler/tf2circle/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
  require("hermes-std")
  require("moco-tf")
  require("exo")
diff --git a/compiler/tf2circle/src/tf2circle.cpp b/compiler/tf2circle/src/tf2circle.cpp

index a1160e9683581b25688c382f0520c1119174b260..b4d21133ddc56bf6ac549007c384475a8c2457b3 100644 (file)
--- a/compiler/tf2circle/src/tf2circle.cpp
+++ b/compiler/tf2circle/src/tf2circle.cpp
@@ -28,10 +28,8 @@
  #include <hermes/ConsoleReporter.h>
  #include <hermes/EnvConfig.h>
  
-#include <stdex/Memory.h>
-
  #include <cassert>
-
+#include <memory>
  #include <iostream>
  #include <stdexcept>
  #include <string>
@@ -70,8 +68,8 @@ struct LoggingContext
      if (ctx == nullptr)
      {
        ctx = new hermes::Context;
-      ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-      ctx->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log"));
+      ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+      ctx->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log"));
      }
  
      return ctx;
@@ -133,9 +131,9 @@ int EntryFunctor::operator()(int argc, char **argv) const
    using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
  
    // This line allows users to control all the moco-tf loggers via TF2CIRCLE_Log_Frontend
-  moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log_Frontend"));
+  moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log_Frontend"));
    // This line allows users to control all the exo-circle loggers via TF2CIRCLE_Log_Backend
-  exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log_Backend"));
+  exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log_Backend"));
  
    LOGGER(l);
  
diff --git a/compiler/tf2nnpkg/CMakeLists.txt b/compiler/tf2nnpkg/CMakeLists.txt

index 8e1edf85897229343747fdacd56b6d9240caf967..b81f40646708ad89b9ebf6ce4c12ff148292d2c4 100644 (file)
--- a/compiler/tf2nnpkg/CMakeLists.txt
+++ b/compiler/tf2nnpkg/CMakeLists.txt
@@ -30,6 +30,5 @@ target_link_libraries(tf2nnpkg PRIVATE tfinfo)
  target_link_libraries(tf2nnpkg PRIVATE exo)
  target_link_libraries(tf2nnpkg PRIVATE locop)
  target_link_libraries(tf2nnpkg PRIVATE hermes_std)
-target_link_libraries(tf2nnpkg PRIVATE stdex)
  target_link_libraries(tf2nnpkg PRIVATE angkor cwrap)
  install(TARGETS tf2nnpkg DESTINATION bin)
diff --git a/compiler/tf2nnpkg/requires.cmake b/compiler/tf2nnpkg/requires.cmake

index 68d45bf3a0d97ce040c4d9cb5d665ea5f753e26a..87ea50bf77a917b72dc636d23accfd03720c7a01 100644 (file)
--- a/compiler/tf2nnpkg/requires.cmake
+++ b/compiler/tf2nnpkg/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
  require("hermes-std")
  require("moco-tf")
  require("exo")
diff --git a/compiler/tf2nnpkg/src/tf2nnpkg.cpp b/compiler/tf2nnpkg/src/tf2nnpkg.cpp

index d9a0d9d2f8ce2dc41d6262e41dbdf6434eb4a0ba..548cee61f8de70b8a7a5654ace91313fed99ebc1 100644 (file)
--- a/compiler/tf2nnpkg/src/tf2nnpkg.cpp
+++ b/compiler/tf2nnpkg/src/tf2nnpkg.cpp
@@ -28,8 +28,7 @@
  #include <hermes/ConsoleReporter.h>
  #include <hermes/EnvConfig.h>
  
-#include <stdex/Memory.h>
-
+#include <memory>
  #include <iostream>
  #include <fstream>
  #include <functional>
@@ -71,8 +70,8 @@ struct LoggingContext
      if (ctx == nullptr)
      {
        ctx = new hermes::Context;
-      ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-      ctx->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log"));
+      ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+      ctx->config(std::make_unique<EnvConfig>("TF2NNPKG_Log"));
      }
  
      return ctx;
@@ -148,9 +147,9 @@ int EntryFunctor::operator()(int argc, char **argv) const
    using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
  
    // This line allows users to control all the moco-tf loggers via TF2NNPKG_Log_Frontend
-  moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log_Frontend"));
+  moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2NNPKG_Log_Frontend"));
    // This line allows users to control all the exo-circle loggers via TF2NNPKG_Log_Backend
-  exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log_Backend"));
+  exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2NNPKG_Log_Backend"));
  
    LOGGER(l);
  
diff --git a/compiler/tf2tflite/CMakeLists.txt b/compiler/tf2tflite/CMakeLists.txt

index 663563e00265848da7cbcad23ac7193726a44533..e4a723305704d15bdd9f546a0d76bcfd517db72f 100644 (file)
--- a/compiler/tf2tflite/CMakeLists.txt
+++ b/compiler/tf2tflite/CMakeLists.txt
@@ -38,7 +38,6 @@ target_link_libraries(tf2tflite PRIVATE tfinfo)
  target_link_libraries(tf2tflite PRIVATE exo)
  target_link_libraries(tf2tflite PRIVATE locop)
  target_link_libraries(tf2tflite PRIVATE hermes_std)
-target_link_libraries(tf2tflite PRIVATE stdex)
  target_link_libraries(tf2tflite PRIVATE angkor cwrap)
  target_link_libraries(tf2tflite PRIVATE tf2tflite_customop_info_proto)
  install(TARGETS tf2tflite DESTINATION bin)
diff --git a/compiler/tf2tflite/requires.cmake b/compiler/tf2tflite/requires.cmake

index 68d45bf3a0d97ce040c4d9cb5d665ea5f753e26a..87ea50bf77a917b72dc636d23accfd03720c7a01 100644 (file)
--- a/compiler/tf2tflite/requires.cmake
+++ b/compiler/tf2tflite/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
  require("hermes-std")
  require("moco-tf")
  require("exo")
diff --git a/compiler/tf2tflite/src/Driver.cpp b/compiler/tf2tflite/src/Driver.cpp

index e43d30bb22b99e4b7306994b28044a7b39670117..12fcbd005a075f4fe4b796e8b8bdca21c00e7be4 100644 (file)
--- a/compiler/tf2tflite/src/Driver.cpp
+++ b/compiler/tf2tflite/src/Driver.cpp
@@ -28,10 +28,8 @@
  #include <hermes/ConsoleReporter.h>
  #include <hermes/EnvConfig.h>
  
-#include <stdex/Memory.h>
-
  #include <cassert>
-
+#include <memory>
  #include <iostream>
  #include <stdexcept>
  #include <string>
@@ -70,8 +68,8 @@ struct LoggingContext
      if (ctx == nullptr)
      {
        ctx = new hermes::Context;
-      ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-      ctx->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log"));
+      ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+      ctx->config(std::make_unique<EnvConfig>("TF2TFLITE_Log"));
      }
  
      return ctx;
@@ -96,9 +94,9 @@ int main(int argc, char **argv)
    using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
  
    // This line allows users to control all the moco-tf loggers via TF2TFLITE_Log_Frontend
-  moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log_Frontend"));
+  moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2TFLITE_Log_Frontend"));
    // This line allows users to control all the exo-tflite loggers via TF2TFLITE_Log_Backend
-  exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log_Backend"));
+  exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2TFLITE_Log_Backend"));
  
    LOGGER(l);
  
diff --git a/compiler/tf2tfliteV2/tf2tfliteV2.py b/compiler/tf2tfliteV2/tf2tfliteV2.py

index 3fb9881023e2fdfe6ebf8443f3770151a16362cc..c6973ff96632b5d78673b786d768b42851508a2a 100755 (executable)
--- a/compiler/tf2tfliteV2/tf2tfliteV2.py
+++ b/compiler/tf2tfliteV2/tf2tfliteV2.py
@@ -180,6 +180,15 @@ def _v2_convert(flags):
              raise ValueError("--input_arrays must be provided")
          if not flags.output_arrays:
              raise ValueError("--output_arrays must be provided")
+        input_shapes = []
+        if flags.input_shapes:
+            input_shapes = [
+                _parse_array(shape, type_fn=int)
+                for shape in flags.input_shapes.split(":")
+            ]
+            if len(input_shapes) != len(_parse_array(flags.input_arrays)):
+                raise ValueError(
+                    "--input_shapes and --input_arrays must have the same length")
          file_content = open(flags.input_path, 'rb').read()
          try:
              graph_def = tf.compat.v1.GraphDef()
@@ -200,6 +209,8 @@ def _v2_convert(flags):
                  _str + ":0" if len(_str.split(":")) == 1 else _str
                  for _str in _parse_array(flags.output_arrays)
              ])
+        for i in range(len(input_shapes)):
+            wrap_func.inputs[i].set_shape(input_shapes[i])
          converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
  
      if flags.model_format == "saved_model":
diff --git a/compiler/tfinfo-v2/CMakeLists.txt b/compiler/tfinfo-v2/CMakeLists.txt

index cf438ea2980dedb3bf086640ffdb780ffea4d689..40df521b9e92c037b32b55b4563d1eb89a6fdc38 100644 (file)
--- a/compiler/tfinfo-v2/CMakeLists.txt
+++ b/compiler/tfinfo-v2/CMakeLists.txt
@@ -24,7 +24,6 @@ set_target_properties(tfinfo_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(tfinfo_v2 PUBLIC include)
  target_link_libraries(tfinfo_v2 PRIVATE tfinfo_v2_proto)
  target_link_libraries(tfinfo_v2 PRIVATE oops)
-target_link_libraries(tfinfo_v2 PRIVATE stdex)
  
  if(NOT ENABLE_TEST)
    return()
diff --git a/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h b/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h

index f26d0354abea9b17782a65b03cc7d5e0c235f39e..8c014f1fa36830bb05045ea22f20dd3012e1369f 100644 (file)
--- a/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h
+++ b/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h
@@ -98,7 +98,7 @@ public:
    }
  
    TensorSignature(const Kind kind, const std::string &name, const ShapeHint &shape_hint)
-      : TensorSignature(kind, name)
+    : TensorSignature(kind, name)
    {
      _shape_hint = shape_hint;
    }
diff --git a/compiler/tfinfo-v2/requires.cmake b/compiler/tfinfo-v2/requires.cmake

index e7efab4fbefbcf19d55547e386832d153ec668f5..a1b9744213733b2485f64da6cc6cabb6077f6a9c 100644 (file)
--- a/compiler/tfinfo-v2/requires.cmake
+++ b/compiler/tfinfo-v2/requires.cmake
@@ -1,2 +1 @@
  require("oops")
-require("stdex")
diff --git a/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp b/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp

index 02a2d9199bdaad763e34082289b289e2a0e59b74..bcab4ac7fe645600ce6161b0ce219a1dc929ea8b 100644 (file)
--- a/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp
+++ b/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp
@@ -54,7 +54,7 @@ const std::vector<std::string> success_cases =
                      name : "relu:0"
                  }
      ),
-    // clang-format on
+  // clang-format on
  };
  
  } // namespace
@@ -221,7 +221,7 @@ const std::vector<std::string> fail_cases =
                  input, a:0, TF_FLOAT, [2, 3 ,4]
                  output, b:0, TF_FLOAT, [2, 3 ,4]
        )",
-    // clang-format on
+  // clang-format on
  };
  
  } // namespace
diff --git a/compiler/tfinfo-v2/src/TensorInfoLoader.cpp b/compiler/tfinfo-v2/src/TensorInfoLoader.cpp

index 0bf828773cc97c163c440349f6af1fd2302ee4bc..249bf384ae2925225b8d0bbfa51532ebc7c9e90e 100644 (file)
--- a/compiler/tfinfo-v2/src/TensorInfoLoader.cpp
+++ b/compiler/tfinfo-v2/src/TensorInfoLoader.cpp
@@ -19,13 +19,13 @@
  #include "tfinfo-v2/TensorSignature.h"
  
  #include <oops/UserExn.h>
-#include <stdex/Memory.h>
  
  #include <tfinfo-v2.pb.h>
  
  #include <google/protobuf/io/zero_copy_stream_impl.h>
  #include <google/protobuf/text_format.h>
  
+#include <memory>
  #include <fstream>
  #include <fcntl.h>
  
@@ -107,8 +107,8 @@ void convert(tfinfo_v2_proto::InfoDef &info_def, tfinfo::v2::TensorSignatures &t
        auto name = input_def.name();
        validate_tensor_name(name, path);
  
-      auto tensor = stdex::make_unique<tfinfo::v2::TensorSignature>(
-          tfinfo::v2::TensorSignature::Kind::Input, name);
+      auto tensor = std::make_unique<tfinfo::v2::TensorSignature>(
+        tfinfo::v2::TensorSignature::Kind::Input, name);
  
        // when there is dim attribute for unknown shape
        if (input_def.dim_size() > 0)
@@ -136,8 +136,8 @@ void convert(tfinfo_v2_proto::InfoDef &info_def, tfinfo::v2::TensorSignatures &t
        auto name = info_def.output().Get(i).name();
        validate_tensor_name(name, path);
  
-      auto tensor = stdex::make_unique<tfinfo::v2::TensorSignature>(
-          tfinfo::v2::TensorSignature::Kind::Output, name);
+      auto tensor = std::make_unique<tfinfo::v2::TensorSignature>(
+        tfinfo::v2::TensorSignature::Kind::Output, name);
        tensors.emplace_back(std::move(tensor));
      }
    }
diff --git a/compiler/tfinfo/CMakeLists.txt b/compiler/tfinfo/CMakeLists.txt

index 678912e6fb12be5e625b0e262c071c09260f3ed4..359699e13fdbb3ff96d975cc0fff1f094f320be0 100644 (file)
--- a/compiler/tfinfo/CMakeLists.txt
+++ b/compiler/tfinfo/CMakeLists.txt
@@ -5,7 +5,7 @@ list(REMOVE_ITEM SOURCES ${TESTS})
  add_library(tfinfo STATIC ${SOURCES})
  set_target_properties(tfinfo PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(tfinfo PUBLIC include)
-target_link_libraries(tfinfo stdex angkor oops)
+target_link_libraries(tfinfo angkor oops)
  
  # TODO Remove "nnkit_support_tftestinfo" later
  add_library(nnkit_support_tftestinfo ALIAS tfinfo)
diff --git a/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h b/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h

index aec8c5e40b5c7bde4e7aeb616cf0fe0a6dd6e56f..eef2062079828fd5686b6ff0c87fdf9302e52bfe 100644 (file)
--- a/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h
+++ b/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h
@@ -57,7 +57,7 @@ public:
  
    ParsedTensor(const Kind kind, const std::string &name, const DataType &dtype,
                 const std::vector<int32_t> &shape)
-      : _kind(kind), _dtype(dtype)
+    : _kind(kind), _dtype(dtype)
    {
      _tensor_name.assign(name);
  
@@ -66,7 +66,9 @@ public:
        _shape.dim(rank) = shape.at(rank);
    }
  
-  ~ParsedTensor() { /* empty */}
+  ~ParsedTensor()
+  { /* empty */
+  }
  
  public:
    Kind kind() const { return _kind; }
diff --git a/compiler/tfinfo/requires.cmake b/compiler/tfinfo/requires.cmake

index 3b45c6458e31ce090c4767233c71495bf5de5db2..d7ecb23821e9c5a9f7b5738adcf63ead98be4a9c 100644 (file)
--- a/compiler/tfinfo/requires.cmake
+++ b/compiler/tfinfo/requires.cmake
@@ -1,3 +1,2 @@
-require("stdex")
  require("angkor")
  require("oops")
diff --git a/compiler/tfinfo/src/TensorInfoParser.cpp b/compiler/tfinfo/src/TensorInfoParser.cpp

index 9eb3da2962763273dfda556162cfb7a6ff468eee..050da40de97278a6ca89c5311178900b80911459 100644 (file)
--- a/compiler/tfinfo/src/TensorInfoParser.cpp
+++ b/compiler/tfinfo/src/TensorInfoParser.cpp
@@ -21,7 +21,6 @@
  #include "Compat.h"
  
  #include <oops/UserExn.h>
-#include <stdex/Memory.h>
  #include <nncc/core/ADT/tensor/Shape.h>
  
  #include <cctype>
@@ -197,7 +196,7 @@ std::unique_ptr<ParsedTensor> parse_line(std::string &line)
      shape.emplace_back(std::stoi(dim));
    }
  
-  return stdex::make_unique<ParsedTensor>(kind, name, dtype, shape);
+  return std::make_unique<ParsedTensor>(kind, name, dtype, shape);
  }
  
  #undef CHECK_NOT_NULL
diff --git a/compiler/tfkit/CMakeLists.txt b/compiler/tfkit/CMakeLists.txt

index b809658b18d10f8faa946808c38745c9636480fa..2058fbc02a083186179b880965c2d7f338227762 100644 (file)
--- a/compiler/tfkit/CMakeLists.txt
+++ b/compiler/tfkit/CMakeLists.txt
@@ -7,7 +7,6 @@ message(STATUS "Build tfkit: TRUE")
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
  add_executable(tfkit ${SOURCES})
-target_link_libraries(tfkit PRIVATE stdex)
  target_link_libraries(tfkit PRIVATE cli)
  target_link_libraries(tfkit PRIVATE mio_tf)
  target_link_libraries(tfkit PRIVATE nncc_common)
diff --git a/compiler/tfkit/src/ConvertCommand.cpp b/compiler/tfkit/src/ConvertCommand.cpp

index 3e417cc78d2ceceb9a5faad9bc59e0a96eb455a7..2b5d077c9a954663afe3b563852cbafc4243d615 100644 (file)
--- a/compiler/tfkit/src/ConvertCommand.cpp
+++ b/compiler/tfkit/src/ConvertCommand.cpp
@@ -17,8 +17,6 @@
  #include "ConvertCommand.hpp"
  #include "Support.hpp"
  
-#include <stdex/Memory.h>
-
  #include <tensorflow/core/framework/graph.pb.h>
  
  #include <google/protobuf/io/coded_stream.h>
@@ -26,6 +24,7 @@
  #include <google/protobuf/text_format.h>
  #include <google/protobuf/util/json_util.h>
  
+#include <memory>
  #include <cassert>
  #include <map>
  #include <string>
@@ -114,12 +113,12 @@ int ConvertCommand::run(int argc, const char *const *argv) const
  
    std::map<std::string, std::unique_ptr<Importer>> importers;
  
-  importers["pb"] = stdex::make_unique<ImporterImpl<DataFormat::PBBIN>>();
-  importers["pbtxt"] = stdex::make_unique<ImporterImpl<DataFormat::PBTXT>>();
+  importers["pb"] = std::make_unique<ImporterImpl<DataFormat::PBBIN>>();
+  importers["pbtxt"] = std::make_unique<ImporterImpl<DataFormat::PBTXT>>();
  
    std::map<std::string, std::unique_ptr<Exporter>> exporters;
  
-  exporters["json"] = stdex::make_unique<ExporterImpl<DataFormat::JSON>>();
+  exporters["json"] = std::make_unique<ExporterImpl<DataFormat::JSON>>();
  
    auto importer = importers.at(input_format).get();
    auto exporter = exporters.at(output_format).get();
diff --git a/compiler/tfkit/src/Main.cpp b/compiler/tfkit/src/Main.cpp

index 60bd6abfa613ce130bd0ad6e25b4dce8e95c2326..a695741dd3871d9bf59653f6139ee392a006c4cd 100644 (file)
--- a/compiler/tfkit/src/Main.cpp
+++ b/compiler/tfkit/src/Main.cpp
@@ -21,17 +21,18 @@
  #include "ConvertCommand.hpp"
  
  #include <cli/App.h>
-#include <stdex/Memory.h>
+
+#include <memory>
  
  int main(int argc, char **argv)
  {
    cli::App app{argv[0]};
  
-  app.insert("encode", stdex::make_unique<tfkit::EncodeCommand>());
-  app.insert("decode", stdex::make_unique<tfkit::DecodeCommand>());
-  app.insert("unpack", stdex::make_unique<tfkit::UnpackCommand>());
-  app.insert("pack", stdex::make_unique<tfkit::PackCommand>());
-  app.insert("convert", stdex::make_unique<tfkit::ConvertCommand>());
+  app.insert("encode", std::make_unique<tfkit::EncodeCommand>());
+  app.insert("decode", std::make_unique<tfkit::DecodeCommand>());
+  app.insert("unpack", std::make_unique<tfkit::UnpackCommand>());
+  app.insert("pack", std::make_unique<tfkit::PackCommand>());
+  app.insert("convert", std::make_unique<tfkit::ConvertCommand>());
  
    return app.run(argc - 1, argv + 1);
  }
diff --git a/compiler/tfkit/src/PackCommand.cpp b/compiler/tfkit/src/PackCommand.cpp

index a1c4a6fc86dae6d78d2cf84241f4c4447eab69fc..d854e30db6878957568f059606f1cc57de7c4001 100644 (file)
--- a/compiler/tfkit/src/PackCommand.cpp
+++ b/compiler/tfkit/src/PackCommand.cpp
@@ -60,7 +60,7 @@ template <> void pack<float>(tensorflow::TensorProto *input_tensor)
      }
  
      input_tensor->set_tensor_content(std::string(
-        reinterpret_cast<const char *>(tensor_content.data()), sizeof(float) * input_flat_size));
+      reinterpret_cast<const char *>(tensor_content.data()), sizeof(float) * input_flat_size));
  
      input_tensor->clear_float_val();
    }
@@ -99,7 +99,7 @@ template <> void pack<int32_t>(tensorflow::TensorProto *input_tensor)
      }
  
      input_tensor->set_tensor_content(std::string(
-        reinterpret_cast<const char *>(tensor_content.data()), sizeof(int32_t) * input_flat_size));
+      reinterpret_cast<const char *>(tensor_content.data()), sizeof(int32_t) * input_flat_size));
  
      input_tensor->clear_int_val();
    }
diff --git a/compiler/tfkit/src/Support.cpp b/compiler/tfkit/src/Support.cpp

index 40d8705a738e472a4911d4a765ef6beccf1fa6b3..1ce4c46805258264b979912947a790489eef4a37 100644 (file)
--- a/compiler/tfkit/src/Support.cpp
+++ b/compiler/tfkit/src/Support.cpp
@@ -17,10 +17,9 @@
  
  #include "Support.hpp"
  
-#include <stdex/Memory.h>
-
  #include <tensorflow/core/framework/graph.pb.h>
  
+#include <memory>
  #include <cassert>
  #include <fstream>
  #include <stdexcept>
@@ -36,7 +35,7 @@ std::unique_ptr<T> open_fstream(const std::string &path, std::ios_base::openmode
      return nullptr;
    }
  
-  auto stream = stdex::make_unique<T>(path.c_str(), mode);
+  auto stream = std::make_unique<T>(path.c_str(), mode);
    if (!stream->is_open())
    {
      throw std::runtime_error{"ERROR: Failed to open " + path};
@@ -111,7 +110,7 @@ std::string CmdArguments::get_or(unsigned int index, const std::string &s) const
  
  std::unique_ptr<IOConfiguration> make_ioconfig(const CmdArguments &cmdargs)
  {
-  auto iocfg = stdex::make_unique<IOConfiguration>();
+  auto iocfg = std::make_unique<IOConfiguration>();
  
    auto in = open_fstream<std::ifstream>(cmdargs.get_or(0, "-"), std::ios::in | std::ios::binary);
    iocfg->in(std::move(in));
diff --git a/compiler/tfkit/src/Support.hpp b/compiler/tfkit/src/Support.hpp

index a5b954d5e860db72dbcc6b18cdbf7d4043b51fc6..21726ea572f86497020e499ea74e0ba5bfb45787 100644 (file)
--- a/compiler/tfkit/src/Support.hpp
+++ b/compiler/tfkit/src/Support.hpp
@@ -41,7 +41,7 @@ class CmdArguments
  public:
    CmdArguments() = delete;
    CmdArguments(int argc, const char *const *argv)
-      : _argc(static_cast<unsigned int>(argc)), _argv{argv}
+    : _argc(static_cast<unsigned int>(argc)), _argv{argv}
    {
    }
  
diff --git a/compiler/tfkit/src/UnpackCommand.cpp b/compiler/tfkit/src/UnpackCommand.cpp

index a6711f13181947e66b74e80bbd4eed9f0d4573e7..b5dd78cbbac4cb56b9588b9c946e2913e7000acd 100644 (file)
--- a/compiler/tfkit/src/UnpackCommand.cpp
+++ b/compiler/tfkit/src/UnpackCommand.cpp
@@ -52,7 +52,7 @@ template <> void unpack<float>(tensorflow::TensorProto *input_tensor)
      input_tensor->clear_float_val();
  
      const float *tensor_content =
-        reinterpret_cast<const float *>(input_tensor->tensor_content().data());
+      reinterpret_cast<const float *>(input_tensor->tensor_content().data());
      for (int i = 0; i < input_flat_size; i++)
      {
        input_tensor->add_float_val(tensor_content[i]);
@@ -87,7 +87,7 @@ template <> void unpack<int32_t>(tensorflow::TensorProto *input_tensor)
      input_tensor->clear_int_val();
  
      const int32_t *tensor_content =
-        reinterpret_cast<const int32_t *>(input_tensor->tensor_content().data());
+      reinterpret_cast<const int32_t *>(input_tensor->tensor_content().data());
      for (int i = 0; i < input_flat_size; i++)
      {
        input_tensor->add_int_val(tensor_content[i]);
@@ -122,7 +122,7 @@ template <> void unpack<int8_t>(tensorflow::TensorProto *input_tensor)
      input_tensor->clear_int_val();
  
      const int8_t *tensor_content =
-        reinterpret_cast<const int8_t *>(input_tensor->tensor_content().data());
+      reinterpret_cast<const int8_t *>(input_tensor->tensor_content().data());
      for (int i = 0; i < input_flat_size; i++)
      {
        input_tensor->add_int_val(tensor_content[i]);
@@ -157,7 +157,7 @@ template <> void unpack<bool>(tensorflow::TensorProto *input_tensor)
      input_tensor->clear_bool_val();
  
      const bool *tensor_content =
-        reinterpret_cast<const bool *>(input_tensor->tensor_content().data());
+      reinterpret_cast<const bool *>(input_tensor->tensor_content().data());
      for (int i = 0; i < input_flat_size; i++)
      {
        input_tensor->add_bool_val(tensor_content[i]);
diff --git a/compiler/tfl-inspect/driver/Driver.cpp b/compiler/tfl-inspect/driver/Driver.cpp

index a480011691bae1fcbd38cc8102433d47abcfb685..3e62e0ffbb47697e5d48c88b0b8872ef69fad5d6 100644 (file)
--- a/compiler/tfl-inspect/driver/Driver.cpp
+++ b/compiler/tfl-inspect/driver/Driver.cpp
@@ -32,8 +32,8 @@ int entry(int argc, char **argv)
                       "Lite model files"};
    arser.add_argument("--operators").nargs(0).help("Dump operators in tflite file");
    arser.add_argument("--conv2d_weight")
-      .nargs(0)
-      .help("Dump Conv2D series weight operators in tflite file");
+    .nargs(0)
+    .help("Dump Conv2D series weight operators in tflite file");
    arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in tflite file");
    arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to inspect");
  
diff --git a/compiler/tflchef/core/src/Convert.cpp b/compiler/tflchef/core/src/Convert.cpp

index 9602faa9640b0e842f4d4633a557e7a07bf0903d..de3ae4ed1713abae7cc7139174c54d186d5a38cb 100644 (file)
--- a/compiler/tflchef/core/src/Convert.cpp
+++ b/compiler/tflchef/core/src/Convert.cpp
@@ -70,6 +70,8 @@ tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value)
        return tflite::TensorType_INT64;
      case tflchef::BOOL:
        return tflite::TensorType_BOOL;
+    case tflchef::INT16:
+      return tflite::TensorType_INT16;
      default:
        break;
    }
diff --git a/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp

new file mode 100644 (file)

index 0000000..fc429e2
--- /dev/null
+++ b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BroadcastTo.h"
+
+#include "flatbuffers/flexbuffers.h"
+
+flatbuffers::Offset<void> BroadcastToChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  return flatbuffers::Offset<void>();
+}
+
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+BroadcastToChef::custom_value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+
+  assert(operation.type() == "BroadcastTo");
+
+  /**
+   * REGISTER_OP("BroadcastTo")
+    .Input("input: T")
+    .Input("shape: Tidx")
+    .Output("output: T")
+    .Attr("T: type")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c)
+   */
+
+  auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+  size_t map_start = flex_buffers->StartMap();
+
+  // TODO Support more data types
+  flex_buffers->Int("T", tflite::TensorType_FLOAT32);
+  flex_buffers->Int("Tidx", tflite::TensorType_INT32);
+
+  flex_buffers->EndMap(map_start);
+  flex_buffers->Finish();
+
+  auto circle_custom_options = fbb.CreateVector(flex_buffers->GetBuffer());
+  return circle_custom_options;
+}
+
+std::unique_ptr<OpChef> BroadcastToChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new BroadcastToChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/CustomOp/BroadcastTo.h b/compiler/tflchef/core/src/CustomOp/BroadcastTo.h

new file mode 100644 (file)

index 0000000..3ed71c5
--- /dev/null
+++ b/compiler/tflchef/core/src/CustomOp/BroadcastTo.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_BROADCASTTO_H__
+#define __OP_BROADCASTTO_H__
+
+#include "OpChef.h"
+
+class BroadcastToChef final : public OpChef
+{
+public:
+  explicit BroadcastToChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_CUSTOM; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+  custom_value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct BroadcastToChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_BROADCASTTO_H__
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp

index 2f4d7eeb57c4043370fdebcc81b7901028b85e7e..467b0d3001cdd8f143bbf03f0d800bcd5b2d9aab 100644 (file)
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -51,7 +51,7 @@ class GeneratedModelImpl final : public tflchef::GeneratedModel::Impl
  {
  public:
    GeneratedModelImpl(std::unique_ptr<flatbuffers::FlatBufferBuilder> &&builder)
-      : _builder{std::move(builder)}
+    : _builder{std::move(builder)}
    {
      // DO NOTHING
    }
@@ -90,6 +90,7 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
    static DataChefRegistry fp32;
    static DataChefRegistry u8;
    static DataChefRegistry boolean;
+  static DataChefRegistry s16;
  
    switch (type)
    {
@@ -103,6 +104,8 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
        return u8;
      case tflchef::BOOL:
        return boolean;
+    case tflchef::INT16:
+      return s16;
      default:
        break;
    }
@@ -197,6 +200,7 @@ struct CookParams
    std::vector<flatbuffers::Offset<::tflite::SubGraph>> &subgraph_vec;
    std::unique_ptr<flatbuffers::FlatBufferBuilder> &flatbuffer_builder;
    std::map<tflite::BuiltinOperator, int32_t> &builtin_code_map;
+  std::vector<std::string> &custom_code_vec;
    std::string noname;
  };
  
@@ -209,6 +213,7 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
    std::vector<flatbuffers::Offset<::tflite::SubGraph>> &subgraph_vec = cp.subgraph_vec;
    std::unique_ptr<flatbuffers::FlatBufferBuilder> &flatbuffer_builder = cp.flatbuffer_builder;
    std::map<tflite::BuiltinOperator, int32_t> &builtin_code_map = cp.builtin_code_map;
+  std::vector<std::string> &custom_code_vec = cp.custom_code_vec;
  
    // Operand-related
    std::vector<flatbuffers::Offset<::tflite::Tensor>> tensor_vec;
@@ -399,21 +404,21 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
        {
          // Create array segments
          auto tflite_array_segments =
-            as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_segments());
+          as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_segments());
  
          // Create array indices
          auto tflite_array_indices =
-            as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_indices());
+          as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_indices());
  
          auto tflite_dim_metadata_builder = tflite::DimensionMetadataBuilder{*flatbuffer_builder};
          tflite_dim_metadata_builder.add_format(as_tflite_dimensiontype(dm.format()));
          tflite_dim_metadata_builder.add_dense_size(dm.dense_size());
          tflite_dim_metadata_builder.add_array_segments(tflite_array_segments);
          tflite_dim_metadata_builder.add_array_segments_type(
-            as_tflite_sparse_idx_vec_type(dm.array_segments().type()));
+          as_tflite_sparse_idx_vec_type(dm.array_segments().type()));
          tflite_dim_metadata_builder.add_array_indices(tflite_array_indices);
          tflite_dim_metadata_builder.add_array_indices_type(
-            as_tflite_sparse_idx_vec_type(dm.array_indices().type()));
+          as_tflite_sparse_idx_vec_type(dm.array_indices().type()));
          auto tflite_dim_metadata = tflite_dim_metadata_builder.Finish();
          dim_metadata_vec.emplace_back(tflite_dim_metadata);
        }
@@ -480,11 +485,23 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
      // Create Operator
      tflite::OperatorBuilder op_builder{*flatbuffer_builder};
  
-    // Get operator code index from builtin_code_set with assumption, order of
-    // builtin_code_set is same as that of code_vec
+    // Note that opcode_index is an index into the operator_codes vector.
+    // operator_codes consists of buildtin_code and custom_code, which is inserted sequentially.
+    uint32_t opcode_index = 0;
      auto op_it = builtin_code_map.find(op_chef->code());
-    assert(op_it != builtin_code_map.end());
-    uint32_t opcode_index = std::distance(builtin_code_map.begin(), op_it);
+    // builtin operator
+    if (op_it != builtin_code_map.end())
+    {
+      opcode_index = std::distance(builtin_code_map.begin(), op_it);
+    }
+    // custom operator
+    else
+    {
+      auto op_it = std::find(custom_code_vec.begin(), custom_code_vec.end(), operation.type());
+      assert(op_it != custom_code_vec.end());
+      opcode_index = builtin_code_map.size();
+      opcode_index += std::distance(custom_code_vec.begin(), op_it);
+    }
  
      op_builder.add_opcode_index(opcode_index);
      op_builder.add_inputs(inputs);
@@ -538,7 +555,7 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
  // Initialize Data Chef Registry
  #define DATA_CHEF(TYPE, NAME, FACTORY_CLASS) \
    data_chef_registry(::tflchef::TYPE)        \
-      .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
+    .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
  #include <souschef/DataChef.def>
  #undef DATA_CHEF
  
@@ -546,7 +563,7 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
    // Create FlatBufferBuilder
    //
    auto flatbuffer_builder =
-      std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+    std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
  
    // Operand-related
    std::vector<flatbuffers::Offset<::tflite::Buffer>> buffer_vec;
@@ -571,11 +588,9 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
  
    // Create OperatorCode with Custom Operator
    std::set<std::string> custom_code_set = gather_customcode_set(model_recipe);
-  if (custom_code_set.size() &&
-      builtin_code_map.find(tflite::BuiltinOperator_CUSTOM) == builtin_code_map.end())
-    builtin_code_map[tflite::BuiltinOperator_CUSTOM] = 1;
+  std::vector<std::string> custom_code_vec{custom_code_set.begin(), custom_code_set.end()};
  
-  for (auto opcode : custom_code_set)
+  for (auto opcode : custom_code_vec)
    {
      auto custom_code = flatbuffer_builder->CreateString(opcode);
      tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
@@ -598,7 +613,8 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
    //
    // Create Main graph
    //
-  CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder, builtin_code_map, "main"};
+  CookParams cp{buffer_vec,       code_vec,        subgraph_vec, flatbuffer_builder,
+                builtin_code_map, custom_code_vec, "main"};
  
    cook_graph<::tflchef::ModelRecipe>(model_recipe, cp);
  
@@ -612,8 +628,8 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
      std::ostringstream stringStream;
      stringStream << "sub_" << (g + 1);
  
-    CookParams cp{buffer_vec,         code_vec,         subgraph_vec,
-                  flatbuffer_builder, builtin_code_map, stringStream.str()};
+    CookParams cp{buffer_vec,       code_vec,        subgraph_vec,      flatbuffer_builder,
+                  builtin_code_map, custom_code_vec, stringStream.str()};
  
      cook_graph<::tflchef::Graph>(graph, cp);
    }
@@ -640,7 +656,7 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
  
    // Return "GenerateModel"
    return GeneratedModel{
-      std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
+    std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
  }
  
  } // namespace tflchef
diff --git a/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.cpp b/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.cpp

new file mode 100644 (file)

index 0000000..1bf2264
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTM.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void>
+BidirectionalSequenceLSTMChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+
+  assert(operation.has_bidirectional_sequence_lstm_options());
+
+  tflite::BidirectionalSequenceLSTMOptionsBuilder options_builder(fbb);
+  options_builder.add_fused_activation_function(
+    as_tflite_activation(operation.bidirectional_sequence_lstm_options().activation()));
+  options_builder.add_cell_clip(operation.bidirectional_sequence_lstm_options().cell_clip());
+  options_builder.add_proj_clip(operation.bidirectional_sequence_lstm_options().proj_clip());
+  options_builder.add_time_major(operation.bidirectional_sequence_lstm_options().time_major());
+  options_builder.add_asymmetric_quantize_inputs(
+    operation.bidirectional_sequence_lstm_options().asymmetric_quantize_inputs());
+  options_builder.add_merge_outputs(
+    operation.bidirectional_sequence_lstm_options().merge_outputs());
+
+  return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef>
+BidirectionalSequenceLSTMChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new BidirectionalSequenceLSTMChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.h b/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.h

new file mode 100644 (file)

index 0000000..e66917b
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "OpChef.h"
+
+class BidirectionalSequenceLSTMChef final : public OpChef
+{
+public:
+  explicit BidirectionalSequenceLSTMChef(const tflchef::Operation *operation)
+    : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override
+  {
+    return tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM;
+  }
+
+  tflite::BuiltinOptions type(void) const override
+  {
+    return tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions;
+  }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct BidirectionalSequenceLSTMChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
diff --git a/compiler/tflchef/core/src/Op/FakeQuant.cpp b/compiler/tflchef/core/src/Op/FakeQuant.cpp

new file mode 100644 (file)

index 0000000..e4cbbfe
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/FakeQuant.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FakeQuant.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> FakeQuantChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+  assert(operation.has_fakequant_options());
+
+  auto options = operation.fakequant_options();
+
+  tflite::FakeQuantOptionsBuilder fq_options_builder{fbb};
+  fq_options_builder.add_min(options.min());
+  fq_options_builder.add_max(options.max());
+  fq_options_builder.add_num_bits(options.num_bits());
+  fq_options_builder.add_narrow_range(options.narrow_range());
+
+  return fq_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> FakeQuantChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new FakeQuantChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/FakeQuant.h b/compiler/tflchef/core/src/Op/FakeQuant.h

new file mode 100644 (file)

index 0000000..0fbfea3
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/FakeQuant.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_FAKE_QUANT_H__
+#define __OP_FAKE_QUANT_H__
+
+#include "OpChef.h"
+
+class FakeQuantChef final : public OpChef
+{
+public:
+  explicit FakeQuantChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_FAKE_QUANT; }
+
+  tflite::BuiltinOptions type(void) const override
+  {
+    return tflite::BuiltinOptions_FakeQuantOptions;
+  }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct FakeQuantChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_FAKE_QUANT_H__
diff --git a/compiler/tflchef/core/src/Op/LocalResponseNormalization.h b/compiler/tflchef/core/src/Op/LocalResponseNormalization.h

index 62a2355f25c157bf2af392def6c7ed29e82989e9..afc37e6ec0df39c8a082316cb418e1b685a9da58 100644 (file)
--- a/compiler/tflchef/core/src/Op/LocalResponseNormalization.h
+++ b/compiler/tflchef/core/src/Op/LocalResponseNormalization.h
@@ -23,7 +23,7 @@ class LocalResponseNormalizationChef final : public OpChef
  {
  public:
    explicit LocalResponseNormalizationChef(const tflchef::Operation *operation)
-      : _operation{operation}
+    : _operation{operation}
    {
      // DO NOTHING
    }
diff --git a/compiler/tflchef/core/src/Op/Squeeze.cpp b/compiler/tflchef/core/src/Op/Squeeze.cpp

index 8d6ef42d65108680a294bb1fd46661be88a47947..1c1d99a0126eb599abf58c5d98a905693a09c9aa 100644 (file)
--- a/compiler/tflchef/core/src/Op/Squeeze.cpp
+++ b/compiler/tflchef/core/src/Op/Squeeze.cpp
@@ -30,7 +30,7 @@ flatbuffers::Offset<void> SqueezeChef::value(flatbuffers::FlatBufferBuilder &fbb
    // Note: 'CreateVector' should be placed before 'CreateOptions'
    //       Read flatbuffers.h 'void NotNested()' for more information
    auto fb_squeeze_dims =
-      fbb.CreateVector(options.squeeze_dim().data(), options.squeeze_dim().size());
+    fbb.CreateVector(options.squeeze_dim().data(), options.squeeze_dim().size());
  
    return tflite::CreateSqueezeOptions(fbb, fb_squeeze_dims).Union();
  }
diff --git a/compiler/tflchef/core/src/Op/StridedSlice.cpp b/compiler/tflchef/core/src/Op/StridedSlice.cpp

index 587a95c66dc2fa7a365bb3cadbbe884198e09246..67fd03140501efa70b8ee31a5b3be643f91458c9 100644 (file)
--- a/compiler/tflchef/core/src/Op/StridedSlice.cpp
+++ b/compiler/tflchef/core/src/Op/StridedSlice.cpp
@@ -29,11 +29,11 @@ flatbuffers::Offset<void> StridedSliceChef::value(flatbuffers::FlatBufferBuilder
    strided_slice_options_builder.add_begin_mask(operation.strided_slice_options().begin_mask());
    strided_slice_options_builder.add_end_mask(operation.strided_slice_options().end_mask());
    strided_slice_options_builder.add_ellipsis_mask(
-      operation.strided_slice_options().ellipsis_mask());
+    operation.strided_slice_options().ellipsis_mask());
    strided_slice_options_builder.add_new_axis_mask(
-      operation.strided_slice_options().new_axis_mask());
+    operation.strided_slice_options().new_axis_mask());
    strided_slice_options_builder.add_shrink_axis_mask(
-      operation.strided_slice_options().shrink_axis_mask());
+    operation.strided_slice_options().shrink_axis_mask());
  
    return strided_slice_options_builder.Finish().Union();
  }
diff --git a/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.cpp b/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.cpp

index ceabfc13ce1da1a0fad0aae2859c2959c9d0b828..2d6becdff492f0a37a3819b7f4344f37884b7488 100644 (file)
--- a/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.cpp
+++ b/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.cpp
@@ -28,12 +28,12 @@ UnidirectionalSequenceLSTMChef::value(flatbuffers::FlatBufferBuilder &fbb) const
  
    tflite::UnidirectionalSequenceLSTMOptionsBuilder options_builder(fbb);
    options_builder.add_fused_activation_function(
-      as_tflite_activation(operation.unidirectional_sequence_lstm_options().activation()));
+    as_tflite_activation(operation.unidirectional_sequence_lstm_options().activation()));
    options_builder.add_cell_clip(operation.unidirectional_sequence_lstm_options().cell_clip());
    options_builder.add_proj_clip(operation.unidirectional_sequence_lstm_options().proj_clip());
    options_builder.add_time_major(operation.unidirectional_sequence_lstm_options().time_major());
    options_builder.add_asymmetric_quantize_inputs(
-      operation.unidirectional_sequence_lstm_options().asymmetric_quantize_inputs());
+    operation.unidirectional_sequence_lstm_options().asymmetric_quantize_inputs());
  
    return options_builder.Finish().Union();
  }
diff --git a/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.h b/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.h

index 6811ad378996375f468fa2f008933b1b22da144a..b8a6d810309e2569ff4aebd0767d95a7df5b7ed9 100644 (file)
--- a/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.h
+++ b/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.h
@@ -23,7 +23,7 @@ class UnidirectionalSequenceLSTMChef final : public OpChef
  {
  public:
    explicit UnidirectionalSequenceLSTMChef(const tflchef::Operation *operation)
-      : _operation{operation}
+    : _operation{operation}
    {
      // DO NOTHING
    }
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def

index 718fffc7818166b2d11a31bf1dfa7a0bf00ca6bf..714e8947b82e2f1611702395432cec4484d36049 100644 (file)
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -12,6 +12,7 @@ OP_CHEF(ArgMin, ArgMinChefFactory)
  OP_CHEF(AveragePool2D, AveragePool2DChefFactory)
  OP_CHEF(BatchMatMul, BatchMatMulChefFactory)
  OP_CHEF(BatchToSpaceND, BatchToSpaceNDChefFactory)
+OP_CHEF(BidirectionalSequenceLSTM, BidirectionalSequenceLSTMChefFactory)
  OP_CHEF(Cast, CastChefFactory)
  OP_CHEF(Ceil, CeilChefFactory)
  OP_CHEF(Concatenation, ConcatenationChefFactory)
@@ -25,6 +26,7 @@ OP_CHEF(ELU, ELUChefFactory)
  OP_CHEF(Equal, EqualChefFactory)
  OP_CHEF(Exp, ExpChefFactory)
  OP_CHEF(ExpandDims, ExpandDimsChefFactory)
+OP_CHEF(FakeQuant, FakeQuantChefFactory)
  OP_CHEF(Fill, FillChefFactory)
  OP_CHEF(Floor, FloorChefFactory)
  OP_CHEF(FloorDiv, FloorDivChefFactory)
@@ -117,6 +119,7 @@ OP_CHEF(ZerosLike, ZerosLikeChefFactory)
  OP_CHEF(AddV2, AddV2ChefFactory)
  OP_CHEF(All, AllChefFactory)
  OP_CHEF(BatchMatMulV2, BatchMatMulV2ChefFactory)
+OP_CHEF(BroadcastTo, BroadcastToChefFactory)
  OP_CHEF(MatMul, MatMulChefFactory)
  OP_CHEF(MatrixBandPart, MatrixBandPartChefFactory)
  OP_CHEF(MaxPoolWithArgMax, MaxPoolWithArgMaxChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h

index 3527937a0395098bdaa234a9acbb9a839de0a630..99f331e37cc476d8f04941554ebcc15c20d6fef6 100644 (file)
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -25,6 +25,7 @@
  #include "Op/AveragePool2D.h"
  #include "Op/BatchMatMul.h"
  #include "Op/BatchToSpaceND.h"
+#include "Op/BidirectionalSequenceLSTM.h"
  #include "Op/Cast.h"
  #include "Op/Ceil.h"
  #include "Op/Concatenation.h"
@@ -38,6 +39,7 @@
  #include "Op/Equal.h"
  #include "Op/Exp.h"
  #include "Op/ExpandDims.h"
+#include "Op/FakeQuant.h"
  #include "Op/Fill.h"
  #include "Op/Floor.h"
  #include "Op/FloorDiv.h"
@@ -129,6 +131,7 @@
  #include "CustomOp/AddV2.h"
  #include "CustomOp/All.h"
  #include "CustomOp/BatchMatMulV2.h"
+#include "CustomOp/BroadcastTo.h"
  #include "CustomOp/MatMul.h"
  #include "CustomOp/MatrixBandPart.h"
  #include "CustomOp/MaxPoolWithArgMax.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto

index 48a682d941be7e3f24b9a1ab9406aec7f5def743..c5e44f68c75bb7dc7fc873835fe6a708ff12e70b 100644 (file)
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -19,6 +19,7 @@ enum TensorType {
    UINT8 = 3;
    INT64 = 4;
    BOOL = 6;
+  INT16 = 7;
  }
  
  enum DimensionType {
@@ -110,6 +111,15 @@ enum MirrorPadMode {
    SYMMETRIC = 1;
  }
  
+message BidirectionalSequenceLSTMOptions {
+  optional Activation activation = 1 [default = NONE];
+  optional float cell_clip = 2 [default = 0.0];
+  optional float proj_clip = 3 [default = 0.0];
+  optional bool merge_outputs = 6 [default = false];
+  optional bool time_major = 4 [default = true];
+  optional bool asymmetric_quantize_inputs = 5 [default = false];  
+}
+
  message Conv2DOptions
  {
    optional Padding padding = 1 [default = VALID];
@@ -509,6 +519,13 @@ message MaxPoolWithArgMaxOptions {
    optional bool include_batch_in_index = 7 [default = false];
  }
  
+message FakeQuantOptions {
+  optional float min = 1 [default = 0.0];
+  optional float max = 2 [default = 0.0];
+  optional int32 num_bits = 3 [default = 0];
+  optional bool narrow_range = 4 [default = false];
+}
+
  message Operation {
    optional string type = 1;
    repeated string input = 2;
@@ -593,8 +610,8 @@ message Operation {
    optional SparseToDenseOptions sparse_to_dense_options = 175;
    optional PowOptions pow_options = 176;
    optional ArgMinOptions argmin_options = 177;
-  // FakeQuantOptions 178
-  // BidirectionalSequenceLSTMOptions 179
+  optional FakeQuantOptions fakequant_options = 178;
+  optional BidirectionalSequenceLSTMOptions bidirectional_sequence_lstm_options = 179;
    // BidirectionalSequenceRNNOptions 180
    optional UnidirectionalSequenceLSTMOptions unidirectional_sequence_lstm_options = 181;
    optional RangeOptions range_options = 182;
diff --git a/compiler/tflchef/tests/short_int_datatype/test.recipe b/compiler/tflchef/tests/short_int_datatype/test.recipe

new file mode 100644 (file)

index 0000000..1e135d9
--- /dev/null
+++ b/compiler/tflchef/tests/short_int_datatype/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: INT16
+  shape { dim: 1 dim: 5 dim: 5 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: INT16
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "1.0"
+    arg: "6.0"
+  }
+}
+operand {
+  name: "bias"
+  type: INT16
+  shape { dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "12345"
+  }
+}
+operand {
+  name: "ofm"
+  type: INT16
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
diff --git a/compiler/tflchef/tests/short_int_datatype/test.reverse b/compiler/tflchef/tests/short_int_datatype/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt

index 83127cb3eb9fe0edbc1084466130ab665a19be93..ce8b8c463a8535d682cad02072e269537a26ae00 100644 (file)
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -5,6 +5,5 @@ target_include_directories(tflchef_tflite PUBLIC include)
  target_include_directories(tflchef_tflite PRIVATE src)
  target_link_libraries(tflchef_tflite tflchef_proto)
  target_link_libraries(tflchef_tflite mio_tflite)
-target_link_libraries(tflchef_tflite stdex)
  target_link_libraries(tflchef_tflite cwrap)
  target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/Convert.cpp b/compiler/tflchef/tflite/src/Convert.cpp

index 29276ff9472cfff841038fc9c013095383340268..f47e51d3def24e68011a4cd20706abd902a8fa1d 100644 (file)
--- a/compiler/tflchef/tflite/src/Convert.cpp
+++ b/compiler/tflchef/tflite/src/Convert.cpp
@@ -33,10 +33,11 @@ tflchef::TensorType as_tflchef_type(const tflite::TensorType type)
        return tflchef::UINT8;
      case tflite::TensorType_BOOL:
        return tflchef::BOOL;
+    case tflite::TensorType_INT16:
+      return tflchef::INT16;
      // TODO handle other types
      // TensorType_FLOAT16
      // TensorType_STRING
-    // TensorType_INT16
      // TensorType_COMPLEX64
      default:
        throw std::runtime_error{"unsupported tensor type"};
diff --git a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.cpp b/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.cpp

new file mode 100644 (file)

index 0000000..3254824
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTM.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpBidirectionalSequenceLSTM::filler(const tflite::Operator *op, TFliteImport *import,
+                                               tflchef::ModelRecipe *model_recipe) const
+{
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 48);
+
+  for (int32_t i = 0; i < inputs.size(); i++)
+  {
+    // Except for Input 0, 35, 36, 37 and 38.
+    // Each Input mean Input Tensor, ActivationState Tensor (forward and backward), and CellState
+    // Tensor (forward and backward).
+    // This could be updated from previous input or User Given data, so This could not be Const
+    if (i == 0 || i == 35 || i == 36 || i == 37 || i == 38)
+      continue;
+    if (inputs[i] != -1)
+      fill_tensor_to_import(inputs[i], import);
+  }
+}
+
+tflchef::Operation *
+TFliteOpBidirectionalSequenceLSTM::build(const tflite::Operator *op, TFliteImport *import,
+                                         tflchef::ModelRecipe *model_recipe) const
+{
+  auto op_params = op->builtin_options_as_BidirectionalSequenceLSTMOptions();
+  assert(op_params != nullptr);
+
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("BidirectionalSequenceLSTM");
+
+  auto op_options = operation->mutable_bidirectional_sequence_lstm_options();
+
+  op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+  op_options->set_cell_clip(op_params->cell_clip());
+  op_options->set_proj_clip(op_params->proj_clip());
+  op_options->set_time_major(op_params->time_major());
+  op_options->set_asymmetric_quantize_inputs(op_params->asymmetric_quantize_inputs());
+  op_options->set_merge_outputs(op_params->merge_outputs());
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h

new file mode 100644 (file)

index 0000000..333f542
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for BidirectionalSequenceLSTM
+ */
+class TFliteOpBidirectionalSequenceLSTM : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
diff --git a/compiler/tflchef/tflite/src/Op/FakeQuant.cpp b/compiler/tflchef/tflite/src/Op/FakeQuant.cpp

new file mode 100644 (file)

index 0000000..f44b854
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/FakeQuant.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FakeQuant.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpFakeQuant::filler(const tflite::Operator *op, TFliteImport *import,
+                               tflchef::ModelRecipe *model_recipe) const
+{
+  // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpFakeQuant::build(const tflite::Operator *op, TFliteImport *import,
+                                             tflchef::ModelRecipe *model_recipe) const
+{
+  auto op_params = op->builtin_options_as_FakeQuantOptions();
+  assert(op_params != nullptr);
+
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("FakeQuant");
+
+  auto op_options = operation->mutable_fakequant_options();
+
+  op_options->set_min(op_params->min());
+  op_options->set_max(op_params->max());
+  op_options->set_num_bits(op_params->num_bits());
+  op_options->set_narrow_range(op_params->narrow_range());
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/FakeQuant.h b/compiler/tflchef/tflite/src/Op/FakeQuant.h

new file mode 100644 (file)

index 0000000..f36e615
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/FakeQuant.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_FAKE_QUANT_H__
+#define __TFLITE_OP_FAKE_QUANT_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for FakeQuant
+ */
+class TFliteOpFakeQuant : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_FAKE_QUANT_H__
diff --git a/compiler/tflchef/tflite/src/Op/Maximum.cpp b/compiler/tflchef/tflite/src/Op/Maximum.cpp

index fb977b6ed3ad62a679be2caf0dd917928567a484..d52caf0c29566bd0340f4c622ab7adf628473d98 100644 (file)
--- a/compiler/tflchef/tflite/src/Op/Maximum.cpp
+++ b/compiler/tflchef/tflite/src/Op/Maximum.cpp
@@ -16,13 +16,20 @@
  
  #include "Maximum.h"
  
+#include "Convert.h"
+#include "FillerHelper.h"
+
  namespace tflchef
  {
  
  void TFliteOpMaximum::filler(const tflite::Operator *op, TFliteImport *import,
                               tflchef::ModelRecipe *model_recipe) const
  {
-  // Nothing to do with filler
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 2);
+
+  fill_tensor_to_import(inputs[0], import);
+  fill_tensor_to_import(inputs[1], import);
  }
  
  tflchef::Operation *TFliteOpMaximum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Minimum.cpp b/compiler/tflchef/tflite/src/Op/Minimum.cpp

index 2bb50cb89df178277045a129f2c5a1a851c5a875..6440f1debc8ef9990c750c68cbe31dcc1becfb4f 100644 (file)
--- a/compiler/tflchef/tflite/src/Op/Minimum.cpp
+++ b/compiler/tflchef/tflite/src/Op/Minimum.cpp
@@ -17,6 +17,7 @@
  #include "Minimum.h"
  
  #include "Convert.h"
+#include "FillerHelper.h"
  
  namespace tflchef
  {
@@ -24,7 +25,11 @@ namespace tflchef
  void TFliteOpMinimum::filler(const tflite::Operator *op, TFliteImport *import,
                               tflchef::ModelRecipe *model_recipe) const
  {
-  // Nothing to do with filler
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 2);
+
+  fill_tensor_to_import(inputs[0], import);
+  fill_tensor_to_import(inputs[1], import);
  }
  
  tflchef::Operation *TFliteOpMinimum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.cpp b/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.cpp

index c2c79285b8bd9230f8937f3f0b0cbdd69120d718..b2bc1acbdec41d8af826c071fcaf9dbab3873216 100644 (file)
--- a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.cpp
+++ b/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.cpp
@@ -30,11 +30,11 @@ void TFliteOpUnidirectionalSequenceLSTM::filler(const tflite::Operator *op, TFli
  
    for (int32_t i = 0; i < inputs.size(); i++)
    {
-    // Except for Input 0, 17 and 18.
-    // Each Input mean Input[0](=Input Tensor), Input[17](=OutputState Tensor) and
-    // Input[18](=CellState Tensor).
+    // Except for Input 0, 18 and 19.
+    // Each Input mean Input[0](=Input Tensor), Input[18](=OutputState Tensor) and
+    // Input[19](=CellState Tensor).
      // This could be updated from previous input or User Given data, so This could not be Const
-    if (i == 0 || i == 17 || i == 18)
+    if (i == 0 || i == 18 || i == 19)
        continue;
      if (inputs[i] != -1)
        fill_tensor_to_import(inputs[i], import);
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h

index 2e4d2805131b7acadc8ae0e0611b8b1e1069a2c7..960ff6e364b24b98cc3be39e87d7521e8c257d3a 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -26,6 +26,7 @@
  #include "Op/AveragePool2D.h"
  #include "Op/BatchMatMul.h"
  #include "Op/BatchToSpaceND.h"
+#include "Op/BidirectionalSequenceLSTM.h"
  #include "Op/Cast.h"
  #include "Op/Ceil.h"
  #include "Op/Concatenation.h"
@@ -39,6 +40,7 @@
  #include "Op/Equal.h"
  #include "Op/Exp.h"
  #include "Op/ExpandDims.h"
+#include "Op/FakeQuant.h"
  #include "Op/Fill.h"
  #include "Op/Floor.h"
  #include "Op/FloorDiv.h"
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h

index 9cc630a97eb142a6c5f915d539d3b8386e719b6e..c240bcf52c28efaf3081e2a10c28228aa9f87220 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -63,6 +63,7 @@ private:
      REG_TFL_OP(AVERAGE_POOL_2D, TFliteOpAveragePool2D);
      REG_TFL_OP(BATCH_MATMUL, TFliteOpBatchMatMul);
      REG_TFL_OP(BATCH_TO_SPACE_ND, TFliteOpBatchToSpaceND);
+    REG_TFL_OP(BIDIRECTIONAL_SEQUENCE_LSTM, TFliteOpBidirectionalSequenceLSTM);
      REG_TFL_OP(CAST, TFliteOpCast);
      REG_TFL_OP(CEIL, TFliteOpCeil);
      REG_TFL_OP(CONCATENATION, TFliteOpConcatenation);
@@ -76,6 +77,7 @@ private:
      REG_TFL_OP(EQUAL, TFliteOpEqual);
      REG_TFL_OP(EXP, TFliteOpExp);
      REG_TFL_OP(EXPAND_DIMS, TFliteOpExpandDims);
+    REG_TFL_OP(FAKE_QUANT, TFliteOpFakeQuant);
      REG_TFL_OP(FILL, TFliteOpFill);
      REG_TFL_OP(FLOOR, TFliteOpFloor);
      REG_TFL_OP(FLOOR_DIV, TFliteOpFloorDiv);
diff --git a/compiler/tflchef/tools/console/CMakeLists.txt b/compiler/tflchef/tools/console/CMakeLists.txt

index d9160c3a234138cb59b286142e0d5cfd3d7b8149..c57e3fdcb3c8bae83c534035d61d9b750d856e3b 100644 (file)
--- a/compiler/tflchef/tools/console/CMakeLists.txt
+++ b/compiler/tflchef/tools/console/CMakeLists.txt
@@ -1,3 +1,14 @@
  add_executable(tflchef Driver.cpp)
  target_link_libraries(tflchef tflchef_core)
  target_link_libraries(tflchef safemain)
+
+install(TARGETS tflchef DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(tflchef_test Driver.test.cpp Driver.cpp)
+target_link_libraries(tflchef_test tflchef_core)
diff --git a/compiler/tflchef/tools/console/Driver.cpp b/compiler/tflchef/tools/console/Driver.cpp

index d6f7ba1aef73bdc8ed81b3e68eb027d90ed412cd..23f2fff3f781dedc214b1bcfbdb6804bbda95f1e 100644 (file)
--- a/compiler/tflchef/tools/console/Driver.cpp
+++ b/compiler/tflchef/tools/console/Driver.cpp
@@ -22,7 +22,7 @@
  
  #include <iostream>
  
-int entry(int argc, char **argv)
+int entry_stream(std::istream &is)
  {
    int32_t model_version = 1;
  
@@ -30,7 +30,7 @@ int entry(int argc, char **argv)
  
    // Read a model recipe from standard input
    {
-    google::protobuf::io::IstreamInputStream iis{&std::cin};
+    google::protobuf::io::IstreamInputStream iis{&is};
      if (!google::protobuf::TextFormat::Parse(&iis, &model_recipe))
      {
        std::cerr << "ERROR: Failed to parse recipe" << std::endl;
@@ -56,3 +56,9 @@ int entry(int argc, char **argv)
  
    return 0;
  }
+
+int entry(int, char **)
+{
+  // forward to entry_stream
+  return entry_stream(std::cin);
+}
diff --git a/compiler/tflchef/tools/console/Driver.test.cpp b/compiler/tflchef/tools/console/Driver.test.cpp

new file mode 100644 (file)

index 0000000..b3cf213
--- /dev/null
+++ b/compiler/tflchef/tools/console/Driver.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// entry function to test from Driver.cpp
+int entry_stream(std::istream &is);
+
+TEST(TFlChefDriverTest, entry_empty_NEG)
+{
+  std::istringstream empty_input("");
+
+  ASSERT_EQ(0, entry_stream(empty_input));
+}
+
+TEST(TFlChefDriverTest, entry_invaid_NEG)
+{
+  std::istringstream empty_input("invalid: input");
+
+  ASSERT_NE(0, entry_stream(empty_input));
+}
+
+TEST(TFlChefDriverTest, entry_invaid_version_NEG)
+{
+  std::istringstream empty_input("version: 9999");
+
+  ASSERT_NE(0, entry_stream(empty_input));
+}
diff --git a/compiler/tflchef/tools/file/CMakeLists.txt b/compiler/tflchef/tools/file/CMakeLists.txt

index f411d60f156aa1930cb7d9dc757eadb90ee3efd8..e3b7b2f48ee8b3b2de19dd6d762a5b7bfb7e9315 100644 (file)
--- a/compiler/tflchef/tools/file/CMakeLists.txt
+++ b/compiler/tflchef/tools/file/CMakeLists.txt
@@ -2,3 +2,5 @@ add_executable(tflchef-file Driver.cpp)
  target_link_libraries(tflchef-file arser)
  target_link_libraries(tflchef-file tflchef_core)
  target_link_libraries(tflchef-file safemain)
+
+install(TARGETS tflchef-file DESTINATION bin)
diff --git a/compiler/tflchef/tools/file/Driver.cpp b/compiler/tflchef/tools/file/Driver.cpp

index 46e5b55838dfd0e30de0d0e333f7e10bc60fceb3..d4605ced3cd18f965bde8383b3472090f79ac02a 100644 (file)
--- a/compiler/tflchef/tools/file/Driver.cpp
+++ b/compiler/tflchef/tools/file/Driver.cpp
@@ -29,8 +29,8 @@ int entry(int argc, char **argv)
  {
    arser::Arser arser;
    arser.add_argument("recipe")
-      .type(arser::DataType::STR)
-      .help("Source recipe file path to convert");
+    .type(arser::DataType::STR)
+    .help("Source recipe file path to convert");
    arser.add_argument("tflite").type(arser::DataType::STR).help("Target tflite file path");
  
    try
@@ -67,8 +67,8 @@ int entry(int argc, char **argv)
  
    if (model_version > 1)
    {
-    std::cerr << "ERROR: Unsupported recipe version: " << model_version << ", '" << argv[1] << "'"
-              << std::endl;
+    std::cerr << "ERROR: Unsupported recipe version: " << model_version << ", '" << recipe_path
+              << "'" << std::endl;
      return 255;
    }
  
diff --git a/compiler/tflchef/tools/reverse/CMakeLists.txt b/compiler/tflchef/tools/reverse/CMakeLists.txt

index a5c0f5bcad75f6e50d9f3148d49c4e5803159d74..21700facaa1c6f67ef35c4e187603138101b0723 100644 (file)
--- a/compiler/tflchef/tools/reverse/CMakeLists.txt
+++ b/compiler/tflchef/tools/reverse/CMakeLists.txt
@@ -3,3 +3,5 @@ target_link_libraries(tflchef-reverse arser)
  target_link_libraries(tflchef-reverse tflchef_tflite)
  target_link_libraries(tflchef-reverse safemain)
  target_link_libraries(tflchef-reverse foder)
+
+install(TARGETS tflchef-reverse DESTINATION bin)
diff --git a/compiler/tflchef/tools/reverse/Driver.cpp b/compiler/tflchef/tools/reverse/Driver.cpp

index 4d795a3d010eee3e51cbb05f4ae5c7bed98ddf66..1451e8bb8bb9d34d276570aaba7fa683738f3169 100644 (file)
--- a/compiler/tflchef/tools/reverse/Driver.cpp
+++ b/compiler/tflchef/tools/reverse/Driver.cpp
@@ -26,8 +26,8 @@ int entry(int argc, char **argv)
  {
    arser::Arser arser;
    arser.add_argument("tflite")
-      .type(arser::DataType::STR)
-      .help("Source tflite file path to convert");
+    .type(arser::DataType::STR)
+    .help("Source tflite file path to convert");
    arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
  
    try
diff --git a/compiler/tfldump/README.md b/compiler/tfldump/README.md

index 50d003f12f30e7c5efb1b718a46c04f5287fc4c3..65ad105c2c36b150e213002f384b7719171c9d4e 100644 (file)
--- a/compiler/tfldump/README.md
+++ b/compiler/tfldump/README.md
@@ -63,5 +63,4 @@ O T(3) ofm
  ### Dependency
  
  - safemain
-- stdex
  - FlatBuffers
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp

index c35848047ae24a6374b060388a933e4e6947faff..4a417cef190368e7d3e5680996e38c650ab08182 100644 (file)
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -74,6 +74,26 @@ public:
    }
  };
  
+class BidirectionalSequenceLSTMPrinter : public OpPrinter
+{
+public:
+  void options(const tflite::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_BidirectionalSequenceLSTMOptions())
+    {
+      os << "    ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "cell_clip(" << params->cell_clip() << ") ";
+      os << "proj_clip(" << params->proj_clip() << ") ";
+      os << "time_major(" << params->time_major() << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << "merge_outputs(" << params->merge_outputs() << ") ";
+      os << std::endl;
+    }
+  }
+};
+
  class CastPrinter : public OpPrinter
  {
  public:
@@ -277,7 +297,7 @@ public:
        os << "Stride.H(" << conv_params->stride_h() << ") ";
        os << "DepthMultiplier(" << conv_params->depth_multiplier() << ") ";
        os << "Dilation.W(" << conv_params->dilation_w_factor() << ") ";
-      os << "Dilation.H(" << conv_params->dilation_h_factor() << ")";
+      os << "Dilation.H(" << conv_params->dilation_h_factor() << ") ";
        os << "Activation("
           << EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ") ";
        os << std::endl;
@@ -285,6 +305,25 @@ public:
    }
  };
  
+class FakeQuantPrinter : public OpPrinter
+{
+public:
+  void options(const tflite::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_FakeQuantOptions())
+    {
+      os << "    ";
+      os << "Min(" << params->min() << ") ";
+      os << "Max(" << params->max() << ") ";
+      os << "NumBits(" << params->num_bits() << ") ";
+      os << std::boolalpha;
+      os << "NarrowRange(" << params->narrow_range() << ") ";
+      os << std::noboolalpha;
+      os << std::endl;
+    }
+  }
+};
+
  class FullyConnectedPrinter : public OpPrinter
  {
  public:
@@ -672,6 +711,8 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[tflite::BuiltinOperator_ARG_MAX] = make_unique<ArgMaxPrinter>();
    _op_map[tflite::BuiltinOperator_ARG_MIN] = make_unique<ArgMinPrinter>();
    _op_map[tflite::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<Pool2DPrinter>();
+  _op_map[tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM] =
+    make_unique<BidirectionalSequenceLSTMPrinter>();
    _op_map[tflite::BuiltinOperator_CAST] = make_unique<CastPrinter>();
    // There is no Option for CEIL
    _op_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
@@ -680,6 +721,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
    // There is no Option for DEQUANTIZE
    _op_map[tflite::BuiltinOperator_DIV] = make_unique<DivPrinter>();
+  _op_map[tflite::BuiltinOperator_FAKE_QUANT] = make_unique<FakeQuantPrinter>();
    // There is no Option for FLOOR
    // There is no Option for FLOOR_MOD
    _op_map[tflite::BuiltinOperator_FULLY_CONNECTED] = make_unique<FullyConnectedPrinter>();
@@ -689,7 +731,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[tflite::BuiltinOperator_L2_NORMALIZATION] = make_unique<L2NormPrinter>();
    _op_map[tflite::BuiltinOperator_LEAKY_RELU] = make_unique<LeakyReluPrinter>();
    _op_map[tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION] =
-      make_unique<LocalResponseNormalizationPrinter>();
+    make_unique<LocalResponseNormalizationPrinter>();
    // There is no Option for LOG
    // There is no Option for LOGISTIC
    // There is no Option for LOG_SOFTMAX
@@ -714,7 +756,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[tflite::BuiltinOperator_RESHAPE] = make_unique<ReshapePrinter>();
    _op_map[tflite::BuiltinOperator_RESIZE_BILINEAR] = make_unique<ResizeBilinearPrinter>();
    _op_map[tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR] =
-      make_unique<ResizeNearestNeighborPrinter>();
+    make_unique<ResizeNearestNeighborPrinter>();
    _op_map[tflite::BuiltinOperator_REVERSE_SEQUENCE] = make_unique<ReverseSequencePrinter>();
    // There is no Option for ROUND
    // There is no Option for SELECT
@@ -735,7 +777,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[tflite::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
    // There is no Option for TOPK_V2
    _op_map[tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
-      make_unique<UnidirectionalSequenceLSTMPrinter>();
+    make_unique<UnidirectionalSequenceLSTMPrinter>();
    _op_map[tflite::BuiltinOperator_UNIQUE] = make_unique<UniquePrinter>();
    _op_map[tflite::BuiltinOperator_WHILE] = make_unique<WhilePrinter>();
    _op_map[tflite::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt

index b1d1f6149ff104f99a2a2413525af5603df52f1c..3e46dd803154889347cd95532c4185c69ae1469b 100644 (file)
--- a/compiler/tflite2circle/CMakeLists.txt
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -15,5 +15,6 @@ target_link_libraries(tflite2circle safemain)
  target_link_libraries(tflite2circle mio_tflite)
  target_link_libraries(tflite2circle mio_circle)
  target_link_libraries(tflite2circle vconone)
+target_link_libraries(tflite2circle nncc_coverage)
  
  install(TARGETS tflite2circle DESTINATION bin)
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp

index 2f11e0a13e976c5c0b02c78b5d05072ca1ee21a5..ba789217976dfd154a836ee290ab3e02a8f540f9 100644 (file)
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -37,16 +37,16 @@ int entry(int argc, char **argv)
    arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"};
  
    arser.add_argument("--version")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("Show version information and exit")
-      .exit_with(print_version);
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Show version information and exit")
+    .exit_with(print_version);
  
    arser.add_argument("tflite")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("Source tflite file path to convert");
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Source tflite file path to convert");
    arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Target circle file path");
  
    try
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h

index 56a16d4e041e1c6da8a0d2447b801f7e5e440eac..dc6ff086c8d35e41c953f070ae2998bcfa9acdd4 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -26,6 +26,7 @@
  #include "BuildBuiltinOptions/ArgMinOptions.h"
  #include "BuildBuiltinOptions/BatchMatMulOptions.h"
  #include "BuildBuiltinOptions/BatchToSpaceNDOptions.h"
+#include "BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h"
  #include "BuildBuiltinOptions/CastOptions.h"
  #include "BuildBuiltinOptions/ConcatenationOptions.h"
  #include "BuildBuiltinOptions/Conv2DOptions.h"
@@ -36,6 +37,7 @@
  #include "BuildBuiltinOptions/EqualOptions.h"
  #include "BuildBuiltinOptions/ExpandDimsOptions.h"
  #include "BuildBuiltinOptions/ExpOptions.h"
+#include "BuildBuiltinOptions/FakeQuantOptions.h"
  #include "BuildBuiltinOptions/FillOptions.h"
  #include "BuildBuiltinOptions/FloorDivOptions.h"
  #include "BuildBuiltinOptions/FloorModOptions.h"
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp

index f93a0f21fcb657c487ecc6da87b3a8a6a4f6cba8..5bdb1020a3abbad08ed8024d9dcbf7a97051a96a 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp
@@ -29,7 +29,7 @@ flatbuffers::Offset<circle::AddOptions> build_circle_AddOptions(flatbuffers::Fla
    assert(tflite_builtin_options);
    circle::AddOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp

index 0ccdde4cb20a581e22264d5bcb39dbe691b07d33..ac0044a8facf25ea5b48ad9c07fb21ee63d7e75e 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp
@@ -29,7 +29,7 @@ build_circle_ArgMaxOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
    assert(tflite_builtin_options);
    circle::ArgMaxOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_output_type(
-      get_circle_tensortype(tflite_builtin_options->output_type()));
+    get_circle_tensortype(tflite_builtin_options->output_type()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMinOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMinOptions.cpp

index 204558df8da6247f3dedfc95c04bdc8d1a02aabc..3011c8b651708388fa555e660c9a514ff06cd021 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMinOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMinOptions.cpp
@@ -29,7 +29,7 @@ build_circle_ArgMinOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
    assert(tflite_builtin_options);
    circle::ArgMinOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_output_type(
-      get_circle_tensortype(tflite_builtin_options->output_type()));
+    get_circle_tensortype(tflite_builtin_options->output_type()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.cpp

new file mode 100644 (file)

index 0000000..2a6cf17
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTMOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::BidirectionalSequenceLSTMOptions>
+build_circle_BidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &fb,
+                                              const tflite::Operator *op)
+{
+  auto tflite_builtin_options = op->builtin_options_as_BidirectionalSequenceLSTMOptions();
+  circle::BidirectionalSequenceLSTMOptionsBuilder builtin_options_builder{fb};
+  builtin_options_builder.add_fused_activation_function(
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+  builtin_options_builder.add_cell_clip(tflite_builtin_options->cell_clip());
+  builtin_options_builder.add_proj_clip(tflite_builtin_options->proj_clip());
+  builtin_options_builder.add_time_major(tflite_builtin_options->time_major());
+  builtin_options_builder.add_merge_outputs(tflite_builtin_options->merge_outputs());
+  builtin_options_builder.add_asymmetric_quantize_inputs(
+    tflite_builtin_options->asymmetric_quantize_inputs());
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h

new file mode 100644 (file)

index 0000000..7b77b1c
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
+#define __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::BidirectionalSequenceLSTMOptions>
+build_circle_BidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &fb,
+                                              const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp

index bc1445248111407981a9ee928af1c9a6c9ebaf52..0f2422c05a1189885f3be438e6c7822cf551da8d 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp
@@ -31,9 +31,9 @@ build_circle_CastOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Opera
  
    circle::CastOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_in_data_type(
-      get_circle_tensortype(tflite_builtin_options->in_data_type()));
+    get_circle_tensortype(tflite_builtin_options->in_data_type()));
    builtin_options_builder.add_out_data_type(
-      get_circle_tensortype(tflite_builtin_options->out_data_type()));
+    get_circle_tensortype(tflite_builtin_options->out_data_type()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp

index 933e7cf66c3eb8825187ed3b3e41399588701c0f..becc63bf6c7501cc1888018f8c8fc7c22f8b28f2 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp
@@ -30,7 +30,7 @@ build_circle_ConcatenationOptions(flatbuffers::FlatBufferBuilder &fb, const tfli
    circle::ConcatenationOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_axis(tflite_builtin_options->axis());
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp

index ace63dd26a5bab0ed48c96545b636f4e6411220a..ec0cffeda48c8b7b32f6d58c29e426a37d73e20d 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp
@@ -32,7 +32,7 @@ build_circle_Conv2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
    builtin_options_builder.add_stride_w(tflite_builtin_options->stride_w());
    builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    builtin_options_builder.add_dilation_w_factor(tflite_builtin_options->dilation_w_factor());
    builtin_options_builder.add_dilation_h_factor(tflite_builtin_options->dilation_h_factor());
    return builtin_options_builder.Finish();
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp

index 2aa35abc6910bcef41da690d3c6278e445b7ec69..910a6ead9d9742352eb6ec39e1a406b810f1418b 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp
@@ -33,7 +33,7 @@ build_circle_DepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &fb, const tf
    builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
    builtin_options_builder.add_depth_multiplier(tflite_builtin_options->depth_multiplier());
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    builtin_options_builder.add_dilation_w_factor(tflite_builtin_options->dilation_w_factor());
    builtin_options_builder.add_dilation_h_factor(tflite_builtin_options->dilation_h_factor());
    return builtin_options_builder.Finish();
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp

index 4272fe1443b30ad439f6cf0a76d59f3f78b71649..3678928a5d9d6e110e0bf891a377657bc525a51e 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp
@@ -29,7 +29,7 @@ flatbuffers::Offset<circle::DivOptions> build_circle_DivOptions(flatbuffers::Fla
    assert(tflite_builtin_options);
    circle::DivOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.cpp

new file mode 100644 (file)

index 0000000..e38600f
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FillOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FakeQuantOptions>
+build_circle_FakeQuantOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+  auto tflite_builtin_options = op->builtin_options_as_FakeQuantOptions();
+  assert(tflite_builtin_options);
+  circle::FakeQuantOptionsBuilder builtin_options_builder{fb};
+  builtin_options_builder.add_min(tflite_builtin_options->min());
+  builtin_options_builder.add_max(tflite_builtin_options->max());
+  builtin_options_builder.add_num_bits(tflite_builtin_options->num_bits());
+  builtin_options_builder.add_narrow_range(tflite_builtin_options->narrow_range());
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.h

new file mode 100644 (file)

index 0000000..1f5f12b
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_FAKEQUANT_OPTIONS_H__
+#define __BBO_FAKEQUANT_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FillOptions>
+build_circle_FakeQuantOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_FAKEQUANT_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp

index 098a96a40cb98e235927afb7ffb28fa16902665c..2619b73eb3c3275a89e3efe47b8742e8a49b45d5 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
@@ -29,14 +29,14 @@ build_circle_FullyConnectedOptions(flatbuffers::FlatBufferBuilder &fb, const tfl
    assert(tflite_builtin_options);
    circle::FullyConnectedOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    // Get FullyConnectedOptionsWeightsFormat
    auto tflite_weight_format = tflite_builtin_options->weights_format();
    if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_DEFAULT)
      builtin_options_builder.add_weights_format(circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
    else if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)
      builtin_options_builder.add_weights_format(
-        circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
+      circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/L2NormalizeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/L2NormalizeOptions.cpp

index d58aed83d576a7968990d086ec820afb350dd24d..f5121a8111dd40dc75da1ad415bf20c57cf8e2b3 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/L2NormalizeOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/L2NormalizeOptions.cpp
@@ -29,7 +29,7 @@ build_circle_L2NormOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
    assert(tflite_builtin_options);
    circle::L2NormOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp

index 009daea8bda85e2c02aad8335dc81937a4d7566c..3d4b9deb5c55886501ac469a2c44350be8f54292 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp
@@ -29,7 +29,7 @@ flatbuffers::Offset<circle::MulOptions> build_circle_MulOptions(flatbuffers::Fla
    assert(tflite_builtin_options);
    circle::MulOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp

index 6b0bd1288699aecb09e3ab41ddad3fd03c8eba20..d796eadfa1f4ace0525980ee207279d8d188ff3f 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp
@@ -34,7 +34,7 @@ build_circle_Pool2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
    builtin_options_builder.add_filter_width(tflite_builtin_options->filter_width());
    builtin_options_builder.add_filter_height(tflite_builtin_options->filter_height());
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp

index 2e55f4dabf3e907ee902dcfdb1f293931e0ba996..982f3fd6806f52c037e9aa39761a569324ce419a 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp
@@ -29,7 +29,7 @@ flatbuffers::Offset<circle::SubOptions> build_circle_SubOptions(flatbuffers::Fla
    assert(tflite_builtin_options);
    circle::SubOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.cpp

index 64ceb5a74fbaca47abd10299b059f9552939dd27..6e8143be9db58922a1f9579fdcd0f8d0d5ed227c 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.cpp
@@ -29,12 +29,12 @@ build_circle_UnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &f
    auto tflite_builtin_options = op->builtin_options_as_UnidirectionalSequenceLSTMOptions();
    circle::UnidirectionalSequenceLSTMOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
    builtin_options_builder.add_cell_clip(tflite_builtin_options->cell_clip());
    builtin_options_builder.add_proj_clip(tflite_builtin_options->proj_clip());
    builtin_options_builder.add_time_major(tflite_builtin_options->time_major());
    builtin_options_builder.add_asymmetric_quantize_inputs(
-      tflite_builtin_options->asymmetric_quantize_inputs());
+    tflite_builtin_options->asymmetric_quantize_inputs());
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/UniqueOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/UniqueOptions.cpp

index 96ddc15ad7a47ca490f9ce414f044aae953f3919..f7ddeffcb875c1edd2424a86d430537ca9ba9787 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/UniqueOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/UniqueOptions.cpp
@@ -29,7 +29,7 @@ build_circle_UniqueOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
    assert(tflite_builtin_options);
    circle::UniqueOptionsBuilder builtin_options_builder{fb};
    builtin_options_builder.add_idx_out_type(
-      get_circle_tensortype(tflite_builtin_options->idx_out_type()));
+    get_circle_tensortype(tflite_builtin_options->idx_out_type()));
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp

index a95c370893055280d0853edf9b1475802e16a1f2..9ab884e7558b198b86dfc429252a0e619cf1189e 100644 (file)
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -126,13 +126,13 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
          flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
          flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
          flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
-            dim_metadata;
+          dim_metadata;
  
          // traversal_order
          if (it->sparsity()->traversal_order())
          {
            auto traversal_order_vec = std::vector<int32_t>{
-              it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
+            it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
            traversal_order = fb->CreateVector(traversal_order_vec);
          }
  
@@ -152,16 +152,16 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
            // array_segments
            auto tflite_array_segments_type = it->array_segments_type();
            auto circle_array_segments =
-              get_circle_sparse_index_vector(*fb, it->array_segments(), tflite_array_segments_type);
+            get_circle_sparse_index_vector(*fb, it->array_segments(), tflite_array_segments_type);
            auto circle_array_segments_type =
-              get_circle_sparse_index_vector_type(tflite_array_segments_type);
+            get_circle_sparse_index_vector_type(tflite_array_segments_type);
  
            // array_indices
            auto tflite_array_indices_type = it->array_indices_type();
            auto circle_array_indices =
-              get_circle_sparse_index_vector(*fb, it->array_indices(), tflite_array_indices_type);
+            get_circle_sparse_index_vector(*fb, it->array_indices(), tflite_array_indices_type);
            auto circle_array_indices_type =
-              get_circle_sparse_index_vector_type(tflite_array_indices_type);
+            get_circle_sparse_index_vector_type(tflite_array_indices_type);
  
            auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
  
@@ -184,7 +184,7 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
        if (it->shape_signature())
        {
          auto shape_signature_vec =
-            std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
+          std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
          shape_signature = fb->CreateVector(shape_signature_vec);
        }
  
@@ -297,7 +297,7 @@ Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
  }
  
  CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
-    : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
+  : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
  {
    const tflite::Model *tfl_model = model.load_model();
    // verify flatbuffers
@@ -309,11 +309,11 @@ CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
    }
  
    _operator_codes_offset =
-      std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
+    std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
    _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
    _buffers_offset = std::make_unique<Offset<BufferLink>>(fb, tfl_model->buffers());
    _metadata_buffer_offset =
-      std::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer());
+    std::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer());
    model_build();
  }
  
diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp

index f8dd75f4cb558ce77b2536149c5e6e5f89fe0b8a..c5ed62e31454d5af99f1f34dd9b98f7312a9a594 100644 (file)
--- a/compiler/tflite2circle/src/DataLookup.cpp
+++ b/compiler/tflite2circle/src/DataLookup.cpp
@@ -148,7 +148,7 @@ get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const void *v
      {
        const tflite::Int32Vector *i32_array = static_cast<const tflite::Int32Vector *>(v_array);
        auto values_vec_int32 =
-          std::vector<int32_t>{i32_array->values()->begin(), i32_array->values()->end()};
+        std::vector<int32_t>{i32_array->values()->begin(), i32_array->values()->end()};
        auto values_int32 = fb.CreateVector(values_vec_int32);
        circle::Int32VectorBuilder int32_vector_builder{fb};
        int32_vector_builder.add_values(values_int32);
@@ -158,7 +158,7 @@ get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const void *v
      {
        const tflite::Uint16Vector *u16_array = static_cast<const tflite::Uint16Vector *>(v_array);
        auto values_vec_uint16 =
-          std::vector<uint16_t>{u16_array->values()->begin(), u16_array->values()->end()};
+        std::vector<uint16_t>{u16_array->values()->begin(), u16_array->values()->end()};
        auto values_uint16 = fb.CreateVector(values_vec_uint16);
        circle::Uint16VectorBuilder uint16_vector_builder{fb};
        uint16_vector_builder.add_values(values_uint16);
@@ -168,7 +168,7 @@ get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const void *v
      {
        const tflite::Uint8Vector *u8_array = static_cast<const tflite::Uint8Vector *>(v_array);
        auto values_vec_uint8 =
-          std::vector<uint8_t>{u8_array->values()->begin(), u8_array->values()->end()};
+        std::vector<uint8_t>{u8_array->values()->begin(), u8_array->values()->end()};
        auto values_uint8 = fb.CreateVector(values_vec_uint8);
        circle::Uint8VectorBuilder uint8_vector_builder{fb};
        uint8_vector_builder.add_values(values_uint8);
diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h

index 58a357703d37140776056fe9e593dd936acb20ab..601d014ddad6c96f111eb500758a4750116abc70 100644 (file)
--- a/compiler/tflite2circle/src/DataLookup.h
+++ b/compiler/tflite2circle/src/DataLookup.h
@@ -27,19 +27,19 @@ namespace tflite2circle
   * @brief Returns circle builtin_code according to tflite.
   *
   * @note You can see a list of currently supported BuiltinOperator in TFLOperator.lst file.
-*/
+ */
  circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop);
  
  /**
   * @brief Returns circle TensorType according to tflite.
   *
   * @note You can see a list of currently supported TensorType in TFLTensorType.lst file.
-*/
+ */
  circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt);
  
  /**
   * @brief Returns circle Padding enum according to tflite.
-*/
+ */
  circle::Padding get_circle_padding(tflite::Padding tfl_p);
  
  /**
@@ -47,7 +47,7 @@ circle::Padding get_circle_padding(tflite::Padding tfl_p);
   *
   * @note You can see a list of currently supported ActivationFunctionType in
   *       TFLActivationFunctionType.lst file.
-*/
+ */
  circle::ActivationFunctionType
  get_circle_activation_function_type(tflite::ActivationFunctionType tfl_aft);
  
@@ -60,7 +60,7 @@ get_circle_activation_function_type(tflite::ActivationFunctionType tfl_aft);
   *       This function calls the build_circle_##BuiltinOptions internally(e.g.
   *       build_circle_AbsOptions, build_circle_AddOptions, etc.), so refer to it for a more
   *       detailed implementation.
-*/
+ */
  flatbuffers::Offset<void> get_circle_builtin_options(flatbuffers::FlatBufferBuilder &fb,
                                                       const tflite::Operator *op);
  
@@ -68,29 +68,29 @@ flatbuffers::Offset<void> get_circle_builtin_options(flatbuffers::FlatBufferBuil
   * @brief Returns circle builtin_options_type according to tflite.
   *
   * @note You can see a list of currently supported BuiltinOptions in TFLBuiltinOptions.lst file.
-*/
+ */
  circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *op);
  
  /**
   * @brief Returns circle MirrorPadMode according to tflite.
-*/
+ */
  circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode);
  
  /**
   * @brief Returns circle DimensionType according to tflite.
-*/
+ */
  circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type);
  
  /**
   * @brief Returns circle SparseIndexVector according to tflite.
-*/
+ */
  flatbuffers::Offset<void>
  get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const void *values,
                                 const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
  
  /**
   * @brief Returns circle SparseIndexVector type according to tflite.
-*/
+ */
  circle::SparseIndexVector
  get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
  
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst

index 4bc101f8ea0dab4dd6062812399d71a723d32858..f2de7e046bc9ca435b4d78515ae8952edfd97daf 100644 (file)
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -63,7 +63,7 @@ TFL_BUILTIN_OPTIONS(NotEqualOptions)
  TFL_BUILTIN_OPTIONS(ShapeOptions)
  TFL_BUILTIN_OPTIONS(PowOptions)
  TFL_BUILTIN_OPTIONS(ArgMinOptions)
-//TFL_BUILTIN_OPTIONS(FakeQuantOptions)
+TFL_BUILTIN_OPTIONS(FakeQuantOptions)
  TFL_BUILTIN_OPTIONS(PackOptions)
  TFL_BUILTIN_OPTIONS(LogicalOrOptions)
  TFL_BUILTIN_OPTIONS(OneHotOptions)
@@ -74,7 +74,7 @@ TFL_BUILTIN_OPTIONS(FloorDivOptions)
  TFL_BUILTIN_OPTIONS(SquareOptions)
  TFL_BUILTIN_OPTIONS(ZerosLikeOptions)
  TFL_BUILTIN_OPTIONS(FillOptions)
-//TFL_BUILTIN_OPTIONS(BidirectionalSequenceLSTMOptions)
+TFL_BUILTIN_OPTIONS(BidirectionalSequenceLSTMOptions)
  //TFL_BUILTIN_OPTIONS(BidirectionalSequenceRNNOptions)
  TFL_BUILTIN_OPTIONS(UnidirectionalSequenceLSTMOptions)
  TFL_BUILTIN_OPTIONS(FloorModOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt

index 595bbfd99f40e3f9d240cc91b0d4b1836e777d84..882f6507ba381e4ee2074690a079572b8144fdfe 100644 (file)
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
  if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x00000000000c0001)
+  set(VCONONE_VERSION 0x00000000000f0001)
    # NOTE order is [build patch minor major]
    # if VCONONE_VERSION is set with -D option, it will be cached
    # you may have to remove cache file if you remove -D option
diff --git a/compiler/vconone/src/version.cpp b/compiler/vconone/src/version.cpp

index 9b693c62122754aa08d70aacca7db182560cb03a..d94a7ada6df116440ae66f531d4f34ef823bf365 100644 (file)
--- a/compiler/vconone/src/version.cpp
+++ b/compiler/vconone/src/version.cpp
@@ -54,7 +54,7 @@ std::string get_string(void)
  std::string get_copyright(void)
  {
    std::string str;
-  str = "Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
+  str = "Copyright (c) 2020-2021 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
    str += "Licensed under the Apache License, Version 2.0\r\n";
    str += "https://github.com/Samsung/ONE";
    return str;
diff --git a/compute/.clang-format b/compute/.clang-format

deleted file mode 120000 (symlink)

index 0ff66f3..0000000
--- a/compute/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../.clang-format.8
-\ No newline at end of file
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h

index 4a3717885512c4a566a28c7efb0052a80291c3f8..d3e11638103a91aef5588628a145f4437182ae83 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
@@ -264,5 +264,5 @@ private:
      _program_source_map; /**< Contains sources for all programs.
                             Used for compile-time kernel inclusion. >*/
  };
-}
+} // namespace arm_compute
  #endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h

index a0aa0560ba6f519b1f36f9bcc78f039248eb13a1..46d4ae8589b187f7ef6336a2ab4b3ba092d89707 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h
@@ -40,7 +40,7 @@
  #ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H
  #define ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  #include "arm_compute/core/Types.h"
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h

index bb6fcb8f5c0ad1b2af3b9587b5a0a6671f3415cb..eac866b671ae5e7f533d42f1ccc70b4348344cfc 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
@@ -41,8 +41,8 @@
  #ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
  #define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
  #include "arm_compute/core/TypesEx.h"
+#include "src/core/CL/ICLKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h

index ed668fd9c8e8293ccecabc30bdaa90523ccf5e80..cf671102e362049d18bb14f7cef123871b80a26f 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h
@@ -47,7 +47,7 @@
  #ifndef __ARM_COMPUTE_CLCASTBOOLKERNEL_H__
  #define __ARM_COMPUTE_CLCASTBOOLKERNEL_H__
  
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
+#include "src/core/CL/ICLSimple3DKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h

index fb689f7476aeb239ca77026c91ede663a957f8dc..6729fb0f17dcc7ebe8344a459b4afbdd743eae72 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
@@ -47,7 +47,7 @@
  #ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
  #define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h

new file mode 100644 (file)

index 0000000..64908ab
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H
+#define ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface to add a bias to each row of the input tensor
+ *
+ */
+class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel
+{
+public:
+  /** Default constructor */
+  CLGEMMMatrixAccumulateBiasesKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLGEMMMatrixAccumulateBiasesKernel &
+  operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
+  /** Allow instances of this class to be moved */
+  CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
+  /** Allow instances of this class to be moved */
+  CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
+  /** Set the accumulate buffer and the biases of the kernel.
+   *
+   * @param[in, out] accum  The accumulate tensor to convert. Data types supported: F16/F32
+   * @param[in]      biases The shared biases tensor to append. It must be 1D tensor. Data types
+   * supported: Same as @p input
+   */
+  void configure(ICLTensor *accum, const ICLTensor *biases);
+  /** Set the accumulate buffer and the biases of the kernel.
+   *
+   * @param[in]      compile_context The compile context to be used.
+   * @param[in, out] accum           The accumulate tensor to convert. Data types supported: F16/F32
+   * @param[in]      biases          The shared biases tensor to append. It must be 1D tensor. Data
+   * types supported: Same as @p input
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *accum,
+                 const ICLTensor *biases);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLGEMMMatrixAccumulateBiasesKernel
+   *
+   * @param[in] accum      The accumulate tensor to convert. Data types supported: F16/F32
+   * @param[in] biases     The shared biases tensor to append. It must be 1D tensor. Data types
+   * supported: Same as @p input
+   * @param[in] gpu_target GPU target
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  ICLTensor *_accum;
+  const ICLTensor *_biases;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h

index 6630c7be72ba8f7c1488061b67c516fc31a12273..a55f2401dbb2c177cc44fbc9e672cd9a99209b73 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
@@ -47,7 +47,7 @@
  #ifndef __ARM_COMPUTE_CLGATHEREXKERNEL_H__
  #define __ARM_COMPUTE_CLGATHEREXKERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h

index 96f8308984c2f9b66e04062421215f2c07a4f749..f9d6f7cc5cd60eb976cd28e818286abd99cbf1dd 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
@@ -47,7 +47,7 @@
  #ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
  #define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  #include "arm_compute/runtime/CL/CLTensor.h"
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h

index f57e799ade44058c47713dd0a59bd67a0206e794..7da9e9a4c7ae60bc8ffe8d794ca710ce4c16d241 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
  #define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMemsetKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMemsetKernel.h

new file mode 100644 (file)

index 0000000..4befdd0
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMemsetKernel.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H
+#define ARM_COMPUTE_CLMEMSETKERNEL_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for filling the planes of a tensor */
+class CLMemsetKernel : public ICLKernel
+{
+public:
+  /** Default constructor */
+  CLMemsetKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLMemsetKernel(const CLMemsetKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLMemsetKernel &operator=(const CLMemsetKernel &) = delete;
+  /** Allow instances of this class to be moved */
+  CLMemsetKernel(CLMemsetKernel &&) = default;
+  /** Allow instances of this class to be moved */
+  CLMemsetKernel &operator=(CLMemsetKernel &&) = default;
+  /** Default destructor */
+  ~CLMemsetKernel() = default;
+
+  /** Initialise the kernel's tensor and filling value
+   *
+   * @param[in,out] tensor         Input tensor to fill. Supported data types: All.
+   * @param[in]     constant_value The value used to fill the planes of the tensor
+   * @param[in]     window         Window to be used in case setting only part of a tensor. Default
+   * is nullptr.
+   */
+  void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
+  /** Initialise the kernel's tensor and filling value
+   *
+   * @param[in]     compile_context The compile context to be used.
+   * @param[in,out] tensor          Input tensor to fill. Supported data types: All.
+   * @param[in]     constant_value  The value used to fill the planes of the tensor
+   * @param[in]     window          Window to be used in case setting only part of a tensor. Default
+   * is nullptr.
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *tensor,
+                 const PixelValue &constant_value, Window *window = nullptr);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLMemsetKernel
+   *
+   * @param[in] tensor         Source tensor info. Data types supported: All.
+   * @param[in] constant_value The value used to fill the planes of the tensor
+   * @param[in] window         Window to be used in case setting only part of a tensor. Default is
+   * nullptr.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value,
+                         Window *window = nullptr);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  ICLTensor *_tensor;
+  Window _full_window;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h

index 90e8b5705f35cc7bf159349b33b790af637e0be2..5394a062c5560543507835690da2929c30a1d3fa 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__
  #define __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h

index fa383c0d0d97e4c4a6a28850c89f7ef4e8349d0f..384050affb828dc6f52022210a06060c71193917 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_CLNEGKERNEL_H__
  #define __ARM_COMPUTE_CLNEGKERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h

index a512057b90c12c1faa005b14a5f932729a0d247b..1d64f9f7d8460676e5d80bc1cd3ea25703604411 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h
@@ -39,7 +39,7 @@
   */
  #ifndef __ARM_COMPUTE_CLONEHOTKERNEL_H__
  #define __ARM_COMPUTE_CLONEHOTKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  #include "arm_compute/core/Types.h"
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernelEx.h

new file mode 100644 (file)

index 0000000..d4230aa
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernelEx.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPADLAYERKERNELEX_H
+#define ARM_COMPUTE_CLPADLAYERKERNELEX_H
+
+#include "src/core/CL/ICLKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the PadLayer function. */
+class CLPadLayerKernelEx : public ICLKernel
+{
+public:
+  /** Default constructor */
+  CLPadLayerKernelEx();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLPadLayerKernelEx(const CLPadLayerKernelEx &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLPadLayerKernelEx &operator=(const CLPadLayerKernelEx &) = delete;
+  /** Allow instances of this class to be moved */
+  CLPadLayerKernelEx(CLPadLayerKernelEx &&) = default;
+  /** Allow instances of this class to be moved */
+  CLPadLayerKernelEx &operator=(CLPadLayerKernelEx &&) = default;
+  /** Default destructor */
+  ~CLPadLayerKernelEx() = default;
+  /** Set the input and output tensor.
+   *
+   * @param[in]  input          Source tensor. Data types supported: U8, S8, QASYMM8,
+   * QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+   * @param[out] output         Output tensor. Data type supported: same as @p input
+   * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair
+   * padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in]  constant_value (Optional) Constant value to be used for the padding.
+   * @param[in]  mode           (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+                 PixelValue constant_value = PixelValue(),
+                 PaddingMode mode = PaddingMode::CONSTANT);
+  /** Set the input and output tensor.
+   *
+   * @param[in]  compile_context The compile context to be used.
+   * @param[in]  input           Source tensor. Data types supported: All.
+   * @param[out] output          Output tensor. Data type supported: same as @p input
+   * @param[in]  padding         The padding for each spatial dimension of the input tensor. The
+   * pair padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in]  constant_value  (Optional) Constant value to be used for the padding.
+   * @param[in]  mode            (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+                 const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                 PaddingMode mode = PaddingMode::CONSTANT);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLPadLayerKernelEx
+   *
+   * @param[in] input          Source tensor info. Data types supported: U8, S8, QASYMM8,
+   * QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+   * @param[in] output         Output tensor info. Data type supported: same as @p input
+   * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair
+   * padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in] constant_value (Optional) Constant value to be used for the padding.
+   * @param[in] mode           (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+                         const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                         PaddingMode mode = PaddingMode::CONSTANT);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  const ICLTensor *_input;
+  ICLTensor *_output;
+  int _input_start_x;
+  int _input_start_y;
+  bool _4d_enabled;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPADLAYERKERNELEX_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h

index 4e1b56cba001a3c9c04706a913038fe5a5aba0fc..3f60db7bb346afe319ba98bd992988b138ffe1e8 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__
  #define __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h

index 4f9042e410193b359b5b0013cc0fb804b67465f7..548f29a27632567e40b37d9fdfc92efdaf1753cf 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
@@ -47,7 +47,7 @@
  #ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
  #define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  #include "arm_compute/core/Types.h"
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h

index 4d4478ece7a0251be13f2360f8afb7877246b740..5f5b7f9b88ea4f1f9e436246a3a0d03cb4acdaeb 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__
  #define __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h

index aa4a148120a1ef57a6e8f49477f677eb736f1a9e..09073af7ce2f64ffab089b97f93bff92b9140430 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
@@ -47,7 +47,7 @@
  #ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__
  #define __ARM_COMPUTE_CLTOPKV2KERNEL_H__
  
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
  
  // these parameters can be changed
  #define _ITEMS 16                          // number of items in a group
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h

index 8c544cda83dc49c75009278bb8427d4f65064ded..c46b261706a21151e3cfef59b7986db927368980 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
@@ -41,15 +41,19 @@
  #ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
  #define __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
  
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
  #include "arm_compute/core/TypesEx.h"
  
+#include "src/core/cpu/kernels/CpuElementwiseKernel.h"
+
  namespace arm_compute
  {
  
-class NEBinaryLogicalOperationKernel : public NEElementwiseOperationKernel
+class NEBinaryLogicalOperationKernel : public cpu::kernels::CpuComparisonKernel
  {
  public:
+  const char *name() const override { return "NEBinaryLogicalOperationKernel"; }
+
+  NEBinaryLogicalOperationKernel() = default;
    /** Default destructor */
    ~NEBinaryLogicalOperationKernel() = default;
  
@@ -81,6 +85,10 @@ protected:
    // Inherited methods overridden:
    static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
                                     const ITensorInfo &output);
+
+  std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output,
+                     const Window &window)>
+    _function;
  };
  } // namespace arm_compute
  #endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h

index 101f6ac8e735413a4fd6b66f52e464fedd8b0bf4..036d56e690e8db13ccaa85c3e199bc6665d00793 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h
@@ -40,7 +40,7 @@
  #ifndef __ARM_COMPUTE_NECASTBOOLKERNEL_H__
  #define __ARM_COMPUTE_NECASTBOOLKERNEL_H__
  
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h

index 88f21c96e0e2469e68f3e60f92afba222e38fcc0..621500eb8e5dc27b8783fccaec115afc6fc055fc 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
  #define __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
  
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
  #include "arm_compute/core/Types.h"
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h

new file mode 100644 (file)

index 0000000..f8f7ac5
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H
+#define ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+/** NEON kernel to add a bias to each row of the input tensor */
+class NEGEMMMatrixAccumulateBiasesKernel : public INEKernel
+{
+public:
+  const char *name() const override { return "NEGEMMMatrixAccumulateBiasesKernel"; }
+  /** Default constructor */
+  NEGEMMMatrixAccumulateBiasesKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  NEGEMMMatrixAccumulateBiasesKernel(const NEGEMMMatrixAccumulateBiasesKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  NEGEMMMatrixAccumulateBiasesKernel &
+  operator=(const NEGEMMMatrixAccumulateBiasesKernel &) = delete;
+  /** Allow instances of this class to be moved */
+  NEGEMMMatrixAccumulateBiasesKernel(NEGEMMMatrixAccumulateBiasesKernel &&) = default;
+  /** Allow instances of this class to be moved */
+  NEGEMMMatrixAccumulateBiasesKernel &operator=(NEGEMMMatrixAccumulateBiasesKernel &&) = default;
+  /** Default destructor */
+  ~NEGEMMMatrixAccumulateBiasesKernel() = default;
+  /** Set the accumulate buffer and the biases of the kernel.
+   *
+   * @param[in, out] accum  The accumulate tensor to convert. Data type supported: F32
+   * @param[in]      biases The shared biases tensor to append. It must be 1D Tensor. Data type
+   * supported: Same as @p input
+   */
+  void configure(ITensor *accum, const ITensor *biases);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * NEGEMMMatrixAccumulateBiasesKernel
+   *
+   * @param[in] accum  The accumulate tensor to convert. Data type supported: F32
+   * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type
+   * supported: Same as @p input
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *accum, const ITensorInfo *biases);
+
+  // Inherited methods overridden:
+  void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+  ITensor *_accum;
+  const ITensor *_biases;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h

index 5acfde5a822f420d8cb939da517c5cdd22da9393..a03e08adea1044c3610c4f695d3cd2aecc0d4993 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_NEGATHERKERNELEX_H__
  #define __ARM_COMPUTE_NEGATHERKERNELEX_H__
  
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
  #include "arm_compute/core/Types.h"
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h

index cb2a485d50ac775b0f9c20e512e061208dfc360b..fb3a72725cb54427122289fb7832453915f255e2 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
  #define __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
  
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
  #include "arm_compute/core/Types.h"
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h

index 8724cc69bf131a21729b8d9386fc276d1c3e8b0c..1d786b59e330dd26222a2ef3ffd62457f844ab14 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
  #define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
  
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h

index 198b0be9d22d277c7ae11a75198abe842245ebdc..ab534fe96d971a4de43a9a51181dcfc26dcc8f9c 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
  #define __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
  
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h

index 963d7b821a7944a8a3d161cf93593d1cda823938..c1c9f7a3cb7532f320f06915e54cc6e87d1b27bc 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h
@@ -39,7 +39,7 @@
   */
  #ifndef __ARM_COMPUTE_NEONEHOTKERNEL_H__
  #define __ARM_COMPUTE_NEONEHOTKERNEL_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
  #include "arm_compute/core/Types.h"
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h

index 0b080cf7376b24aa54a4c86af75275c0b98bff47..1fd5362ae53391a8b723ee68f20043fe32cb980d 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
@@ -41,7 +41,7 @@
  #ifndef __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
  #define __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
  
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h

index d57e8fcf5cce3f9670515a1dc3e4eb4a12d9e07f..d7ec1b4f0a7df0cfa362640ef8d261c7af79416b 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
@@ -67,5 +67,5 @@ transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height,
                                  unsigned int kernel_width, unsigned int kernel_height,
                                  const PadStrideInfo &info, unsigned int invalid_right,
                                  unsigned int invalid_top);
-}
+} // namespace arm_compute
  #endif /*__ARM_COMPUTE_UTILSEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h

index 484ebfd0b13ad298dcb5c346706f7cd8ca3ec146..664b8b3b1a3514cac07ef7feef0b5250b7e9b3b7 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
@@ -26,6 +26,7 @@
  #include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h>
  #include <arm_compute/runtime/CL/functions/CLNeg.h>
  #include <arm_compute/runtime/CL/functions/CLOneHot.h>
+#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h>
  #include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
  #include <arm_compute/runtime/CL/functions/CLSplitVEx.h>
  #include <arm_compute/runtime/CL/functions/CLTopKV2.h>
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h

index b1ee52bf9d33a1bd12ee1026bd2c96acff820ead..05bcc40755c5df988def6d1b2164c4af8b8aba16 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h
@@ -41,8 +41,9 @@
  #define __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__
  
  #include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h"
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+
  #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
  #include "arm_compute/runtime/CL/CLTensor.h"
  #include "arm_compute/runtime/IFunction.h"
  #include "arm_compute/runtime/IMemoryManager.h"
@@ -100,7 +101,7 @@ private:
    std::vector<CLTensor> _results_vector;
    CLTensor _not_reshaped_output;
    std::vector<CLArgMinMaxLayerKernelEx> _reduction_kernels_vector;
-  CLReshapeLayerKernel _reshape_kernel;
+  CLReshapeLayer _reshape_kernel;
    unsigned int _num_of_stages;
    unsigned int _reduction_axis;
  };
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h

index 88a9b00ec09234b8c361fdfaad7028c86dea8fe5..fc432279870c8b4034be8958d584c2cb539acff7 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
@@ -43,6 +43,7 @@
  
  #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
  #include "arm_compute/core/TypesEx.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h

index d6150684a0073151b3fdbfca1de412bae134b00a..854ddce52a45a31fcedd1a6cca5279c3185d3742 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h
@@ -67,5 +67,5 @@ public:
     */
    void configure(ICLTensor *input, ICLTensor *output);
  };
-}
+} // namespace arm_compute
  #endif /* ARM_COMPUTE_CLCASTBOOL_H */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h

index fbee7e40e8250e6d68d4a8ffd23e47e903384455..b0149cb093fd6aeece17fe9a0874d3b3b0570fe7 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
@@ -73,5 +73,5 @@ public:
     */
    void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
  };
-}
+} // namespace arm_compute
  #endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h

index f3266f688f8d00a571c743e30b9aa9397e26196f..c75ae9a50860dbd90c2f12446e149e4544c578dd 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
@@ -43,14 +43,14 @@
  
  #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
  
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
  #include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
  #include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
  #include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
  #include "arm_compute/runtime/CL/CLTensor.h"
  #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
  
  namespace arm_compute
  {
@@ -182,5 +182,5 @@ private:
    bool _is_prepared;
    const ICLTensor *_original_weights;
  };
-}
+} // namespace arm_compute
  #endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h

index f27e9913ea6d13de45ecbd9f2e411ba88dfa4b43..c08da526aab894bf4fbdf746dea1877cc80c2c83 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
@@ -43,16 +43,14 @@
  
  #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
  
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
  #include "arm_compute/runtime/CL/CLTensor.h"
  #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
  #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
  #include "arm_compute/runtime/CL/functions/CLGEMM.h"
  #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
  #include "arm_compute/runtime/IWeightsManager.h"
  #include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
  
  namespace arm_compute
  {
@@ -132,9 +130,6 @@ private:
   * transpose_weights is set to true ) (called once)
   *  -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized
   * asymmetric)
- *  -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref
- * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
- * not equal to nullptr)
   *
   * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
   */
@@ -157,40 +152,36 @@ public:
     * @param[in]  input   Source tensor. Data type supported: QASYMM8/F16/F32.
     * @param[in]  weights Weights tensor. The weights must be 2 dimensional.
     *                     If this function is called after a Convolution Layer, the (transposed)
-   * weights will have as many rows as the product of the first 3 input's dimensions.
-   *                     If it is called after another FullyConnected Layer, the (transposed)
-   * weights will have as many rows as the input's first dimension.
-   *                     Data type supported: Same as @p input.
+   * weights will have as many rows as the product of the first 3 input's dimensions. If it is
+   * called after another FullyConnected Layer, the (transposed) weights will have as many rows as
+   * the input's first dimension. Data type supported: Same as @p input.
     * @param[in]  biases  Bias tensor. Can be nullptr. Data type supported:Same as @p input.
     * @param[out] output  Destination tensor. Its shape should be equal to the output of a matrix
     * multiplication between:
     *                     - The output of im2col on the input and the (transposed) 2D weights, if the
     * function is called after a Convolution Layer
     *                     - The input tensor and the (transposed) 2D weights, if the function is
-   * called after another FullyConnected Layer.
-   *                     Data type supported: Same as @p input.
+   * called after another FullyConnected Layer. Data type supported: Same as @p input.
     * @param[in]  fc_info (Optional) Fully connected layer additional info
     */
    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
                   ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
    /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLFullyConnectedLayerEx
+   * CLFullyConnectedLayer
     *
     * @param[in]  input   Source tensor info. Data type supported: QASYMM8/F16/F32.
     * @param[in]  weights Weights tensor info. The weights must be 2 dimensional.
     *                     If this function is called after a Convolution Layer, the (transposed)
-   * weights will have as many rows as the product of the first 3 input's dimensions.
-   *                     If it is called after another FullyConnected Layer, the (transposed)
-   * weights will have as many rows as the input's first dimension.
-   *                     Data type supported: Same as @p input.
+   * weights will have as many rows as the product of the first 3 input's dimensions. If it is
+   * called after another FullyConnected Layer, the (transposed) weights will have as many rows as
+   * the input's first dimension. Data type supported: Same as @p input.
     * @param[in]  biases  Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
     * @param[out] output  Destination tensor info. Its shape should be equal to the output of a
     * matrix multiplication between:
     *                     - The output of im2col on the input and the (transposed) 2D weights, if the
     * function is called after a Convolution Layer
     *                     - The input tensor and the (transposed) 2D weights, if the function is
-   * called after another FullyConnected Layer.
-   *                     Data type supported: Same as @p input.
+   * called after another FullyConnected Layer. Data type supported: Same as @p input.
     * @param[in]  fc_info (Optional) Fully connected layer additional info
     *
     * @return a status
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h

index 167554c9e69fe3bb997a4c54931b7df0a50b4e8d..385eb0b2c2bf42b85e2ea2add58983018bac7159 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
@@ -47,11 +47,14 @@
  #ifndef __ARM_COMPUTE_CLGATHEREX_H__
  #define __ARM_COMPUTE_CLGATHEREX_H__
  
+#include "arm_compute/core/Error.h"
  #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
  
  namespace arm_compute
  {
+class CLCompileContext;
  class ICLTensor;
+class ITensorInfo;
  
  /**
   * @brief Class to to run @ref CLGatherKernel.
@@ -81,5 +84,5 @@ public:
    static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
                           const ITensorInfo *output, int axis = 0);
  };
-}
+} // namespace arm_compute
  #endif /*__ARM_COMPUTE_CLGATHEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h

index 6618f5aa40af36651e69d9d60d0c1818bb8468c6..5e172a4c77ca7d29c9f755cc0017310dd0fbcec4 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
@@ -78,5 +78,5 @@ public:
    void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput,
                   ICLTensor *output, ICLTensor *hits);
  };
-}
+} // namespace arm_compute
  #endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h

index 887e7aaa53e3d14dc86d3c009f344e19b759a32b..02ae6d719bb43b518a9fe6a914b10ddbabb88f10 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
@@ -41,11 +41,14 @@
  #ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
  #define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
  
+#include "arm_compute/core/Error.h"
  #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
  
  namespace arm_compute
  {
+class CLCompileContext;
  class ICLTensor;
+class ITensorInfo;
  
  /** Basic function to perform a Instance normalization.
   *
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h

index 2bbfca821fbbb2a08ef7ce068ffa9f46ce9e5b68..62a36f06d5f236aabda733b49c898b0bc6bde189 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h
@@ -39,9 +39,11 @@
   */
  #ifndef __ARM_COMPUTE_CLONEHOT_H__
  #define __ARM_COMPUTE_CLONEHOT_H__
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+
  #include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
  #include "arm_compute/runtime/IFunction.h"
+
  namespace arm_compute
  {
  class ICLTensor;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h

new file mode 100644 (file)

index 0000000..ee1879a
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPADLAYEREX_H
+#define ARM_COMPUTE_CLPADLAYEREX_H
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
+// #include "arm_compute/runtime/CL/functions/CLCopy.h"
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels:
+ *
+ *  -# @ref CLPadLayerKernelEx if there is padding to be added
+ *  -# @ref CLCopyKernel otherwise
+ */
+class CLPadLayerEx : public IFunction
+{
+public:
+  /** Default constructor */
+  CLPadLayerEx();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLPadLayerEx(const CLPadLayerEx &) = delete;
+  /** Default move constructor */
+  CLPadLayerEx(CLPadLayerEx &&) = default;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLPadLayerEx &operator=(const CLPadLayerEx &) = delete;
+  /** Default move assignment operator */
+  CLPadLayerEx &operator=(CLPadLayerEx &&) = default;
+
+  /** Initialize the function
+   *
+   * @param[in]  input          Source tensor. Data types supported: All.
+   * @param[out] output         Output tensor. Data type supported: same as @p input
+   * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair
+   * padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in]  constant_value (Optional) Constant value to be used for the padding.
+   * @param[in]  mode           (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+                 PixelValue constant_value = PixelValue(),
+                 PaddingMode mode = PaddingMode::CONSTANT);
+  /** Initialize the function
+   *
+   * @param[in]  compile_context The compile context to be used.
+   * @param[in]  input           Source tensor. Data types supported: All.
+   * @param[out] output          Output tensor. Data type supported: same as @p input
+   * @param[in]  padding         The padding for each spatial dimension of the input tensor. The
+   * pair padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in]  constant_value  (Optional) Constant value to be used for the padding.
+   * @param[in]  mode            (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
+                 const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                 PaddingMode mode = PaddingMode::CONSTANT);
+
+  /**  Static function to check if given info will lead to a valid configuration of @ref
+   * CLPadLayerEx.
+   *
+   * @param[in] input          Source tensor info. Data types supported: All.
+   * @param[in] output         Output tensor info. Data type supported: same as @p input
+   * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair
+   * padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in] constant_value (Optional) Constant value to be used for the padding
+   * @param[in] mode           (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+                         const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                         PaddingMode mode = PaddingMode::CONSTANT);
+
+  // Inherited methods overridden:
+  void run() override;
+
+private:
+  void configure_reflect_mode(ICLTensor *input, ICLTensor *output);
+
+  std::unique_ptr<CLPadLayerKernelEx> _pad_kernel;
+  std::unique_ptr<opencl::kernels::ClCopyKernel> _copy_kernel;
+  bool _perform_pad;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPADLAYEREX_H */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h

index bb852e404001200875cfd5b5a142334c11fb71aa..45eb72beffc572bd0c2b0a2f9c635ceac3c8e7cf 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
@@ -116,5 +116,5 @@ private:
    std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr};
    CLReshapeLayer _reshape;
  };
-}
+} // namespace arm_compute
  #endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h

index bb741d98d5de3cad53f27ed53032718db366ccee..3023df3f0d13bb200b7029cda3041f26f55ef170 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h
@@ -46,6 +46,9 @@
  #include <vector>
  #include <memory>
  
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/runtime/CPP/functions/CPPSplit.h"
+
  namespace arm_compute
  {
  class ICLTensor;
@@ -82,5 +85,5 @@ private:
    unsigned int _num_splits;
    std::vector<CLSlice> _slice_functions;
  };
-}
+} // namespace arm_compute
  #endif /* __ARM_COMPUTE_CLSPLITVEX__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h

index e301a5152b2bcd2ff9b64ed567ee53255beb0660..f426a4d75be4d2c1d3dd79ca16d9bbdf296169a4 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
@@ -160,5 +160,5 @@ private:
    CLTopKV2Store _store_kernel;
  #endif
  };
-}
+} // namespace arm_compute
  #endif // __ARM_COMPUTE_CLTOPK_V2_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h

index efc296d6c9125fb611de32f68cfbfc8d68622a28..d0ddc26092a5d76f1cb911bc5e75c7942f2816c5 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -16,7 +16,6 @@
  #ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
  #define __ARM_COMPUTE_NEFUNCTIONSEX_H__
  
-#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
  #include <arm_compute/runtime/NEON/functions/NECastBool.h>
  #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
  #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h

index 026d30098c601d8b26bf1113a8838d70b884e35b..8d931f08d82e382bc7d9c1b6d09ba5cc39e6ca23 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
@@ -41,8 +41,10 @@
  #ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
  #define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
  
+#include "arm_compute/core/Error.h"
  #include "arm_compute/core/TypesEx.h"
  #include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/ITensorInfo.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h

index c8b08af8d1aa654e4c8e67df7f523a2e06d31c74..dd62645eeb0317f25da07689f8ee4c94ef58d6e5 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h
@@ -41,16 +41,17 @@
  #define __ARM_COMPUTE_NECASTBOOL_H__
  
  #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
  
  namespace arm_compute
  {
  class ITensor;
+class ITensorInfo;
  
  /**
- * @brief Class to run @ref NECastBoolKernel.
+ * @brief Class to run @ref INESimpleFunctionNoBorder.
   */
-class NECastBool : public INESimpleFunction
+class NECastBool : public INESimpleFunctionNoBorder
  {
  public:
    /** Initialize the function's source, destination
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h

index 63f7714aa6e77f61cb0372345ef08440d41b7083..82a789e86768cfdbbea03338dbd32ea794bc4791 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
@@ -48,12 +48,14 @@
  #define __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__
  
  #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
  
  #include <vector>
  
  namespace arm_compute
  {
  class ITensor;
+class ITensorInfo;
  
  /**
   * @brief Class to perform EmbeddingLookup operation
@@ -84,5 +86,5 @@ public:
    static Status validate(const ITensorInfo *input, const ITensorInfo *output,
                           const ITensorInfo *lookups);
  };
-}
+} // namespace arm_compute
  #endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h

index 56548a4798155737246bc7abc1ffb6ce3b4ee6cc..21459271020388234b4d8b7c3a609df18e699a90 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
@@ -44,11 +44,11 @@
  #include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h"
  #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
  #include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
  #include "arm_compute/runtime/MemoryGroup.h"
  #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
  #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
  #include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h

index 8f98f220a095f700f31cccb12706d47d1772745d..2bbb1fea1262ea1e45c5d50c6c4a942e32292883 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
@@ -43,16 +43,16 @@
  
  #include "arm_compute/runtime/IFunction.h"
  
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
  #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
  #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
  #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
  #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
  #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/MemoryGroup.h"
  #include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
  
  namespace arm_compute
  {
@@ -79,11 +79,11 @@ public:
    /** Prevent instances of this class from being copied (As this class contains pointers) */
    NEFullyConnectedLayerEx(const NEFullyConnectedLayerEx &) = delete;
    /** Default move constructor */
-  NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = default;
+  NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = delete;
    /** Prevent instances of this class from being copied (As this class contains pointers) */
    NEFullyConnectedLayerEx &operator=(const NEFullyConnectedLayerEx &) = delete;
    /** Default move assignment operator */
-  NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = default;
+  NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = delete;
    /** Set the input and output tensors.
     *
     * @param[in]  input   Source tensor. Data type supported: QASYMM8/F16/F32.
@@ -141,7 +141,7 @@ private:
    void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
  
    MemoryGroup _memory_group;
-  NEFlattenLayerKernel _flatten_kernel;
+  NEFlattenLayer _flatten_kernel;
    NEConvertFullyConnectedWeights _convert_weights;
    NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
    NEGEMM _mm_gemm;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h

index 155a1b837d80f4f7431998c806da78fbc90d4969..6944c77f6f38c1d56debea4aea79f4dbb65343bc 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
@@ -47,6 +47,7 @@
  namespace arm_compute
  {
  class ITensor;
+class ITensorInfo;
  
  /** Basic function to run @ref NEGatherKernelEx */
  class NEGatherEx : public INESimpleFunctionNoBorder
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h

index 521a05ad93e6b16a82611ebceb993ce30fa01644..f6fda60a95e33dbbdd9182dcb36e4ff60067461f 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
@@ -48,12 +48,14 @@
  #define __ARM_COMPUTE_NEHASHTABLELOOKUP_H__
  
  #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
  
  #include <vector>
  
  namespace arm_compute
  {
  class ITensor;
+class ITensorInfo;
  
  /**
   * @brief Class to perform HashtableLookup operation
@@ -96,5 +98,5 @@ public:
                           const ITensorInfo *input, const ITensorInfo *output,
                           const ITensorInfo *hits);
  };
-}
+} // namespace arm_compute
  #endif /*__ARM_COMPUTE_NEHASHTABLELOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h

index 18e813923a3436961e5115e45d4f72d7ce518cf7..0ee967698a87538ece1e1c5e5f914a635960c181 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
@@ -54,6 +54,7 @@
  namespace arm_compute
  {
  class ITensor;
+class ITensorInfo;
  
  /** Basic function to perform a Instance normalization.
   *
@@ -112,5 +113,5 @@ private:
    Tensor _permuted_input;
    Tensor _permuted_output;
  };
-}
+} // namespace arm_compute
  #endif /* __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h

index 1a68f801a30c18c33824e034d9213018857687e0..668f024a1b5b4d9cf1945e788e22651bf8840bff 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
@@ -45,6 +45,8 @@ namespace arm_compute
  {
  // Forward declarations
  class ITensor;
+class ITensorInfo;
+
  /** Basic function to run @ref NEOneHotKernel */
  class NEOneHot : public INESimpleFunctionNoBorder
  {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h

index 91eec815c086c230efd8e24a7deb9ddfbb6734f9..9858e6c0970bb554801f50effa42b40ecdf35536 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
@@ -43,7 +43,7 @@
  
  #include "arm_compute/runtime/IFunction.h"
  
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
  #include "arm_compute/core/Types.h"
  #include "arm_compute/runtime/MemoryGroup.h"
  #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h

index 48b4169230d1370651e147aa6b843e62c8ab6088..f34a8f8afeb0cbcdcaa71f8a0013041ffb7bc317 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
@@ -43,11 +43,13 @@
  
  #include "arm_compute/runtime/IFunction.h"
  
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
  #include "arm_compute/core/Types.h"
  #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
  #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
  #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h

index 7a08dae97d718000a4099cca39e944233fe21113..f82579a45e679a29b96b1d174ba10ff61455ed68 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -102,9 +102,9 @@ public:
    /** Prevent instances of this class from being copied (As this class contains pointers) */
    NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete;
    /** Allow instances of this class to be moved */
-  NETransposeConvLayer(NETransposeConvLayer &&) = default;
+  NETransposeConvLayer(NETransposeConvLayer &&) = delete;
    /** Allow instances of this class to be moved */
-  NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default;
+  NETransposeConvLayer &operator=(NETransposeConvLayer &&) = delete;
    /** Default destructor */
    virtual ~NETransposeConvLayer() = default;
  
@@ -171,5 +171,5 @@ private:
    PadStrideInfo _info;
    bool _is_prepared;
  };
-} // arm_compute
+} // namespace arm_compute
  #endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp

index 1a8ff3e71ee225493d29bb56042d1a0e9e198de9..1a180a35b791877cf0c2cf317bf89be2d78e5ee8 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -66,12 +66,16 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
    {"gather_ex_1d", "gather_ex.cl"},
    {"gather_ex_1d_out", "gather_ex.cl"},
    {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
+  {"gemm_accumulate_biases", "gemm.cl"},
    {"hashtable_lookup", "hashtable_lookup.cl"},
    {"instance_normalization_ex", "instance_normalization_ex.cl"},
+  {"memset", "memset.cl"},
    {"multiply_scale_factor", "multiply_scale_factor.cl"},
    {"neg_tensor", "neg_tensor.cl"},
    {"one_hot", "one_hot.cl"},
    {"one_hot_only_on_value", "one_hot.cl"},
+  {"pad_layer_constant", "pad_layer.cl"},
+  {"pad_layer_symmetric_reflect", "pad_layer.cl"},
    {"quantization_symm8", "quantization_symm8.cl"},
    {"reduce_min_max", "reduce_operation.cl"},
    {"reduce_sum_mean", "reduce_operation.cl"},
@@ -89,9 +93,17 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
  
  const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
  #ifdef EMBEDDED_KERNELS
+  {
+    "activation_float_helpers.h",
+#include "./cl_kernels/activation_float_helpers.hembed"
+  },
    {
      "arg_min_max_ex.cl",
  #include "./cl_kernels/arg_min_max_ex.clembed"
+  },
+  {
+    "binary_logical_op.cl",
+#include "./cl_kernels/binary_logical_op.clembed"
    },
    {
      "cast.cl",
@@ -108,6 +120,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
    {
      "gemmlowp_ex.cl",
  #include "./cl_kernels/gemmlowp_ex.clembed"
+  },
+  {
+    "gemm_helpers.h",
+#include "./cl_kernels/gemm_helpers.hembed"
    },
    {
      "hashtable_lookup.cl",
@@ -126,8 +142,12 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
  #include "./cl_kernels/instance_normalization_ex.clembed"
    },
    {
-    "binary_logical_op.cl",
-#include "./cl_kernels/binary_logical_op.clembed"
+    "gemm.cl",
+#include "./cl_kernels/gemm.clembed"
+  },
+  {
+    "memset.cl",
+#include "./cl_kernels/memset.clembed"
    },
    {
      "multiply_scale_factor.cl",
@@ -140,6 +160,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
    {
      "one_hot.cl",
  #include "./cl_kernels/one_hot.clembed"
+  },
+  {
+    "pad_layer.cl",
+#include "./cl_kernels/pad_layer.clembed"
    },
    {
      "quantization_symm8.cl",
@@ -148,6 +172,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
    {
      "reduce_operation.cl",
  #include "./cl_kernels/reduce_operation.clembed"
+  },
+  {
+    "repeat.h",
+#include "./cl_kernels/repeat.hembed"
    },
    {
      "scale_factor.cl",
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/activation_float_helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/activation_float_helpers.h

new file mode 100644 (file)

index 0000000..3c3ff84
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/activation_float_helpers.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if GPU_ARCH == GPU_ARCH_BIFROST
+#define MLA(a, b, c) (fma(c, b, a))
+#else // GPU_ARCH == GPU_ARCH_BIFROST
+#define MLA(a, b, c) ((b) * (c) + (a))
+#endif // GPU_ARCH == GPU_ARCH_BIFROST
+
+// Hard-Swish
+#define hard_swish_op(DATA_TYPE, x, A_VAL, B_VAL) \
+  (x * ((min(max((x + (DATA_TYPE)3.0), (DATA_TYPE)0.0), (DATA_TYPE)6.0)) * (DATA_TYPE)0.166666667))
+
+// Logistic Activation
+#define logistic_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)1.0 / ((DATA_TYPE)1.0 + exp(-x)))
+
+// Hyperbolic Tangent Activation
+#define tanh_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)A_VAL * tanh((DATA_TYPE)B_VAL * x))
+
+// RELU Tangent Activation
+#define relu_op(DATA_TYPE, x, A_VAL, B_VAL) (max((DATA_TYPE)0.0, x))
+
+// Bounded RELU Activation
+#define brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min((DATA_TYPE)A_VAL, max((DATA_TYPE)0.0, x)))
+
+// Lower Upper Bounded RELU Activation
+#define lu_brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))
+
+// Leaky RELU Activation
+#define lrelu_op(DATA_TYPE, x, A_VAL, B_VAL) \
+  ((min(x, (DATA_TYPE)0.0) * (DATA_TYPE)A_VAL) + max(x, (DATA_TYPE)0.0))
+
+// Soft RELU Activation
+#define srelu_op(DATA_TYPE, x, A_VAL, B_VAL) (log((DATA_TYPE)1.0 + exp(x)))
+
+// ELU Activation
+#define elu_op(DATA_TYPE, x, A_VAL, B_VAL) \
+  (select(((DATA_TYPE)A_VAL * (exp(x) - (DATA_TYPE)1.0)), x, isgreaterequal(x, (DATA_TYPE)0.0)))
+
+// Absolute Activation
+#define abs_op(DATA_TYPE, x, A_VAL, B_VAL) (fabs(x))
+
+// Square Activation
+#define square_op(DATA_TYPE, x, A_VAL, B_VAL) (x * x)
+
+// Square-root Activation
+#define sqrt_op(DATA_TYPE, x, A_VAL, B_VAL) (sqrt(x))
+
+// Linear Activation
+#define linear_op(DATA_TYPE, x, A_VAL, B_VAL) (MLA((DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL, x))
+
+// Identity Activation
+#define identity_op(DATA_TYPE, x, A_VAL, B_VAL) (x)
+
+#define ACT_OP(op, DATA_TYPE, x, A_VAL, B_VAL) op##_op(DATA_TYPE, x, A_VAL, B_VAL)
+
+#define ACTIVATION(op, DATA_TYPE, x, A_VAL, B_VAL) ACT_OP(op, DATA_TYPE, x, A_VAL, B_VAL)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm.cl

new file mode 100644 (file)

index 0000000..9b826a2
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm.cl
@@ -0,0 +1,7210 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "gemm_helpers.h"
+#include "repeat.h"
+
+#if defined(M0) && defined(K0) && defined(V0) && defined(DATA_TYPE) && defined(SRC_WIDTH)
+#define INC2 (VEC_DATA_TYPE(uint, 2))(0, 1)
+#define INC3 (VEC_DATA_TYPE(uint, 3))(0, 1, 2)
+#define INC4 (VEC_DATA_TYPE(uint, 4))(0, 1, 2, 3)
+#define INC8 (VEC_DATA_TYPE(uint, 8))(0, 1, 2, 3, 4, 5, 6, 7)
+#define INC16 (VEC_DATA_TYPE(uint, 16))(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
+#define CONCAT_INC(K0) INC##K0
+#define INC(K0) CONCAT_INC(K0)
+
+#if (SRC_WIDTH % K0)
+#define BOUNDARY_CONDITION_X(x, a)                                                                \
+  ({                                                                                              \
+    a = select(                                                                                   \
+      0, a,                                                                                       \
+      CONVERT(((x * (VEC_DATA_TYPE(uint, K0))K0 + INC(K0)) < (VEC_DATA_TYPE(uint, K0))SRC_WIDTH), \
+              VEC_DATA_TYPE(DATA_TYPE, K0)));                                                     \
+  })
+#else // (SRC_WIDTH % K0)
+#define BOUNDARY_CONDITION_X(x, a) ({})
+#endif // (SRC_WIDTH % K0)
+
+/** This OpenCL kernel reshapes the lhs input matrix. The kernel splits the input matrix in blocks
+ * of size M0xK0 and stores each one (not transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The width of the input tensor must be passed at compile time using -DSRC_WIDTH (e.g.
+ * -DSRC_WIDTH=16)
+ * @note The block's dimensions (M0 and K0) must be passed at compile time using -DM0 and -DK0 (e.g.
+ * -DM0=2, -DK0=2).
+ * @note The number of M0xK0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DV0 (e.g. -DV0=2)
+ * @note Only the following values for M0, K0 and V0 are supported:
+ *                                      M0: 2,3,4,5,6,7,8
+ *                                      K0: 2,3,4,8,16
+ *                                      V0: greater than 0
+ * @note In case the input has to be reinterpreted as a 3D tensor (e.g. input of convolution layer
+ * 1x1), the following information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# HEIGHT_GEMM3D: The height of the input in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the input in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ * @note If the M0xK0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ *
+ * @param[in]  src_ptr                           Pointer to the source LHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  src_stride_x                      Stride of the source LHS tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source LHS tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source LHS tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source LHS
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  cross_plane_pad                   (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_INPUT_AS_3D)
+ */
+__kernel void gemm_reshape_lhs_matrix_nt(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                                      ,
+                                         uint cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+)
+{
+  // Block size
+#define BLOCK_SIZE ((M0) * (K0))
+
+  // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (K0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+  // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (K0) * (V0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (K0)
+#endif // defined(INTERLEAVE)
+
+  // Compute source and destination addresses
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+  // ------------------ Compute input/output addresses ---------------------------
+
+  // Compute the input address
+  __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+                              x * (uint)K0 * sizeof(DATA_TYPE) + y * (uint)M0 * src_stride_y;
+
+  // Compute the output address
+  __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+                               (x * (uint)BLOCK_SIZE * (uint)V0 * sizeof(DATA_TYPE)) +
+                               ((y / (uint)V0) * (uint)dst_stride_y) +
+                               ((y % V0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE));
+
+  // Create variables: uint zin0=0, zin1=0, zin2=0...zin(M0-1)=0;
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zin, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src_stride_z by DEPTH_GEMM3D
+
+  input_ptr += z * (uint)src_stride_z * DEPTH_GEMM3D;
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, cross_plane_pad, src_stride_y);
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  input_ptr += z * (uint)src_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  output_ptr += z * (uint)dst_stride_z;
+
+  // ---------------------------Load input values --------------------------------
+  // Load values from the LHS matrix
+  LOAD_BLOCK(M0, K0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
+  BOUNDARY_CONDITION_X(x, a0);
+#if M0 > 1
+  BOUNDARY_CONDITION_X(x, a1);
+#endif // M0 > 1
+#if M0 > 2
+  BOUNDARY_CONDITION_X(x, a2);
+#endif // M0 > 2
+#if M0 > 3
+  BOUNDARY_CONDITION_X(x, a3);
+#endif // M0 > 3
+#if M0 > 4
+  BOUNDARY_CONDITION_X(x, a4);
+#endif // M0 > 4
+#if M0 > 5
+  BOUNDARY_CONDITION_X(x, a5);
+#endif // M0 > 5
+#if M0 > 6
+  BOUNDARY_CONDITION_X(x, a6);
+#endif // M0 > 6
+#if M0 > 7
+  BOUNDARY_CONDITION_X(x, a7);
+#endif // M0 > 7
+  // ---------------------------Store output values ------------------------------
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+  STORE_BLOCK(M0, K0, DATA_TYPE, a, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+
+#if M0 == 2
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                              \
+  ({                                                                                          \
+    VEC_DATA_TYPE(DATA_TYPE, M0)                                                              \
+    res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i);                                   \
+    VSTORE(M0)                                                                                \
+    (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+  })
+#elif M0 == 3 // M0 == 3
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                              \
+  ({                                                                                          \
+    VEC_DATA_TYPE(DATA_TYPE, M0)                                                              \
+    res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i);                          \
+    VSTORE(M0)                                                                                \
+    (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+  })
+#elif M0 == 4 // M0 == 4
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                              \
+  ({                                                                                          \
+    VEC_DATA_TYPE(DATA_TYPE, M0)                                                              \
+    res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i, a3.s##i);                 \
+    VSTORE(M0)                                                                                \
+    (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+  })
+#elif M0 == 5 // M0 == 5
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                                  \
+  ({                                                                                              \
+    VEC_DATA_TYPE(DATA_TYPE, 4)                                                                   \
+    res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i);                     \
+    DATA_TYPE res1 = a4.s##i;                                                                     \
+    VSTORE(4)                                                                                     \
+    (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)));    \
+    *((__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4) = res1; \
+  })
+#elif M0 == 6 // M0 == 6
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                                   \
+  ({                                                                                               \
+    VEC_DATA_TYPE(DATA_TYPE, 4)                                                                    \
+    res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i);                      \
+    VEC_DATA_TYPE(DATA_TYPE, 2)                                                                    \
+    res1 = (VEC_DATA_TYPE(DATA_TYPE, 2))(a4.s##i, a5.s##i);                                        \
+    VSTORE(4)                                                                                      \
+    (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)));     \
+    VSTORE(2)                                                                                      \
+    (res1, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4); \
+  })
+#elif M0 == 7 // M0 == 7
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                                   \
+  ({                                                                                               \
+    VEC_DATA_TYPE(DATA_TYPE, 4)                                                                    \
+    res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i);                      \
+    VEC_DATA_TYPE(DATA_TYPE, 3)                                                                    \
+    res1 = (VEC_DATA_TYPE(DATA_TYPE, 3))(a4.s##i, a5.s##i, a6.s##i);                               \
+    VSTORE(4)                                                                                      \
+    (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)));     \
+    VSTORE(3)                                                                                      \
+    (res1, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4); \
+  })
+#elif M0 == 8 // M0 == 8
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                               \
+  ({                                                                                           \
+    VEC_DATA_TYPE(DATA_TYPE, M0)                                                               \
+    res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i, a3.s##i, a4.s##i, a5.s##i, \
+                                         a6.s##i, a7.s##i);                                    \
+    VSTORE(M0)                                                                                 \
+    (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)));  \
+  })
+#else // M0 not supported
+#error "M0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel reshapes the lhs input matrix. The kernel splits the input matrix in blocks
+ * of size M0xK0 and stores each one (transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The width of the input tensor must be passed at compile time using -DSRC_WIDTH (e.g.
+ * -DSRC_WIDTH=16)
+ * @note The block's dimensions (M0 and K0) must be passed at compile time using -DM0 and -DK0 (e.g.
+ * -DM0=2, -DK0=2).
+ * @note The number of M0xK0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DV0 (e.g. -DV0=2)
+ * @note Only the following values for M0, K0 and V0 are supported:
+ *                                      M0: 2,3,4,5,6,7,8
+ *                                      K0: 2,3,4,8,16
+ *                                      V0: greater than 0
+ * @note In case the input has to be reinterpreted as a 3D tensor (e.g. input of convolution layer
+ * 1x1), the following information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# HEIGHT_GEMM3D: The height of the input in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the input in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ * @note If the M0xK0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ *
+ * @param[in]  src_ptr                           Pointer to the source LHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  src_stride_x                      Stride of the source LHS tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source LHS tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source LHS tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source LHS
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  cross_plane_pad                   (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_INPUT_AS_3D)
+ */
+__kernel void gemm_reshape_lhs_matrix_t(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                                     ,
+                                        uint cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+)
+{
+  // Block size
+#define BLOCK_SIZE ((M0) * (K0))
+
+  // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (M0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+  // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (M0) * (V0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (M0)
+#endif // defined(INTERLEAVE)
+
+  // Compute source and destination addresses
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+  // ------------------ Compute input/output addresses ---------------------------
+
+  // Compute the input address
+  __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+                              x * (uint)K0 * sizeof(DATA_TYPE) + y * (uint)M0 * src_stride_y;
+
+  // Compute the output address
+  __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+                               (x * (uint)BLOCK_SIZE * (uint)V0 * sizeof(DATA_TYPE)) +
+                               ((y / (uint)V0) * (uint)dst_stride_y) +
+                               ((y % V0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE));
+
+  // Create variables: uint zin0=0, zin1=0, zin2=0...zin(M0-1)=0;
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zin, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src_stride_z by DEPTH_GEMM3D
+
+  input_ptr += z * (uint)src_stride_z * DEPTH_GEMM3D;
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, cross_plane_pad, src_stride_y);
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  input_ptr += z * (uint)src_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  output_ptr += z * (uint)dst_stride_z;
+
+  // ---------------------------Load input values --------------------------------
+
+  // Load values from the LHS matrix
+  LOAD_BLOCK(M0, K0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
+  BOUNDARY_CONDITION_X(x, a0);
+#if M0 > 1
+  BOUNDARY_CONDITION_X(x, a1);
+#endif // M0 > 1
+#if M0 > 2
+  BOUNDARY_CONDITION_X(x, a2);
+#endif // M0 > 2
+#if M0 > 3
+  BOUNDARY_CONDITION_X(x, a3);
+#endif // M0 > 3
+#if M0 > 4
+  BOUNDARY_CONDITION_X(x, a4);
+#endif // M0 > 4
+#if M0 > 5
+  BOUNDARY_CONDITION_X(x, a5);
+#endif // M0 > 5
+#if M0 > 6
+  BOUNDARY_CONDITION_X(x, a6);
+#endif // M0 > 6
+#if M0 > 7
+  BOUNDARY_CONDITION_X(x, a7);
+#endif // M0 > 7
+  // ---------------------------Transpose and store block -----------------------
+
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 0);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 1);
+#if K0 > 2
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 2);
+#endif // K0 > 2
+#if K0 > 3
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 3);
+#endif // K0 > 3
+#if K0 > 4
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 4);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 5);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 6);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 7);
+#endif // K0 > 4
+#if K0 > 8
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 8);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 9);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, A);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, B);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, C);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, D);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, E);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, F);
+#endif // K0 > 8
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+#endif // defined(M0) && defined(K0) && defined(V0) && defined(DATA_TYPE) && defined(SRC_WIDTH)
+
+#if defined(K0) && defined(N0) && defined(H0) && defined(DATA_TYPE) && defined(SRC_HEIGHT)
+/** This OpenCL kernel reshapes the rhs input matrix. The kernel splits the input matrix in blocks
+ * of size K0xN0 and stores each one (not transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The height of the input tensor must be passed at compile time using -DSRC_HEIGHT (e.g.
+ * -DSRC_HEIGHT=16)
+ * @note The block's dimensions (K0 and N0) must be passed at compile time using -DK0 and -DN0 (e.g.
+ * -DK0=2, -DN0=2).
+ * @note The number of K0xN0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ * @note Only the following values for K0, N0 and H0 are supported:
+ *                                      N0: 2,3,4,8,16
+ *                                      K0: 1,2,3,4,8,16
+ *                                      H0: greater than 0
+ *
+ * @param[in]  src_ptr                           Pointer to the source RHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  src_stride_x                      Stride of the source RHS tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source RHS tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source RHS tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source RHS
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_reshape_rhs_matrix_nt(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Block size
+#define BLOCK_SIZE ((K0) * (N0))
+
+  // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (N0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+  // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (N0) * (H0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (N0)
+#endif // defined(INTERLEAVE)
+
+  // Compute source and destination addresses
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+  // ------------------ Compute input/output addresses ---------------------------
+
+  // Compute the input address
+  __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+                              x * (uint)N0 * sizeof(DATA_TYPE) + y * (uint)K0 * src_stride_y +
+                              z * (uint)src_stride_z;
+
+  // Compute the output address
+  __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+                               (y * (uint)BLOCK_SIZE * (uint)H0 * sizeof(DATA_TYPE)) +
+                               ((x % (uint)H0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE)) +
+                               ((x / (uint)H0) * (uint)dst_stride_y) + z * (uint)dst_stride_z;
+
+  // ---------------------------Load input values --------------------------------
+
+  REPEAT_VAR_INIT_TO_CONST(K0, VEC_DATA_TYPE(DATA_TYPE, N0), a,
+                           0); ////uint a0=0, a1=0, a2=0...a(M0-1)=0;
+
+  // Load values from the RHS matrix
+  a0 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 0 * src_stride_y));
+#if K0 > 1
+  if (y * (uint)K0 + 1 < SRC_HEIGHT)
+  {
+    a1 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 1 * src_stride_y));
+  }
+#endif // K0 > 1
+#if K0 > 2
+  if (y * (uint)K0 + 2 < SRC_HEIGHT)
+  {
+    a2 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 2 * src_stride_y));
+  }
+#endif // K0 > 2
+#if K0 > 3
+  if (y * (uint)K0 + 3 < SRC_HEIGHT)
+  {
+    a3 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 3 * src_stride_y));
+  }
+#endif // K0 > 3
+#if K0 > 4
+  if (y * (uint)K0 + 4 < SRC_HEIGHT)
+  {
+    a4 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 4 * src_stride_y));
+  }
+  if (y * (uint)K0 + 5 < SRC_HEIGHT)
+  {
+    a5 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 5 * src_stride_y));
+  }
+  if (y * (uint)K0 + 6 < SRC_HEIGHT)
+  {
+    a6 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 6 * src_stride_y));
+  }
+  if (y * (uint)K0 + 7 < SRC_HEIGHT)
+  {
+    a7 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 7 * src_stride_y));
+  }
+#endif // K0 > 4
+#if K0 > 8
+  if (y * (uint)K0 + 8 < SRC_HEIGHT)
+  {
+    a8 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 8 * src_stride_y));
+  }
+  if (y * (uint)K0 + 9 < SRC_HEIGHT)
+  {
+    a9 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 9 * src_stride_y));
+  }
+  if (y * (uint)K0 + 10 < SRC_HEIGHT)
+  {
+    aA = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 10 * src_stride_y));
+  }
+  if (y * (uint)K0 + 11 < SRC_HEIGHT)
+  {
+    aB = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 11 * src_stride_y));
+  }
+  if (y * (uint)K0 + 12 < SRC_HEIGHT)
+  {
+    aC = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 12 * src_stride_y));
+  }
+  if (y * (uint)K0 + 13 < SRC_HEIGHT)
+  {
+    aD = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 13 * src_stride_y));
+  }
+  if (y * (uint)K0 + 14 < SRC_HEIGHT)
+  {
+    aE = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 14 * src_stride_y));
+  }
+  if (y * (uint)K0 + 15 < SRC_HEIGHT)
+  {
+    aF = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 15 * src_stride_y));
+  }
+#endif // K0 > 8
+
+  // ---------------------------Store output values ------------------------------
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+  STORE_BLOCK(K0, N0, DATA_TYPE, a, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+
+#if defined(TRANSPOSE)
+/** This OpenCL kernel reshapes the rhs input matrix. The kernel splits the input matrix in blocks
+ * of size K0xN0 and stores each one (transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The height of the input tensor must be passed at compile time using -DSRC_HEIGHT (e.g.
+ * -DSRC_HEIGHT=16)
+ * @note The block's dimensions (K0 and N0) must be passed at compile time using -DK0 and -DN0 (e.g.
+ * -DK0=2, -DN0=2).
+ * @note The number of K0xN0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ * @note The option -DTRANSPOSE must passed at compile time.
+ * @note Only the following values for K0, N0 and H0 are supported:
+ *                                      N0: 2,3,4,8,16
+ *                                      K0: 2,3,4,8,16
+ *                                      H0: greater than 0
+ *
+ * @param[in]  src_ptr                           Pointer to the source RHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  src_stride_x                      Stride of the source RHS tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source RHS tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source RHS tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source RHS
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_reshape_rhs_matrix_t(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Block size
+#define BLOCK_SIZE ((K0) * (N0))
+
+  // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (K0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+  // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (K0) * (H0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (K0)
+#endif // defined(INTERLEAVE)
+
+  // Compute source and destination addresses
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+  // ------------------ Compute input/output addresses ---------------------------
+
+  // Compute the input address
+  __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+                              x * (uint)N0 * sizeof(DATA_TYPE) + y * (uint)K0 * src_stride_y +
+                              z * (uint)src_stride_z;
+
+  // Compute the output address
+  __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+                               (y * (uint)BLOCK_SIZE * (uint)H0 * sizeof(DATA_TYPE)) +
+                               ((x % H0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE)) +
+                               ((x / (uint)H0) * (uint)dst_stride_y) + z * (uint)dst_stride_z;
+
+  // ---------------------------Load input values --------------------------------
+  REPEAT_VAR_INIT_TO_CONST(K0, VEC_DATA_TYPE(DATA_TYPE, N0), a,
+                           0); // VEC_DATA_TYPE(DATA_TYPE, N0)    a0=0, a1=0, ... a(K0-1)=0;
+
+  // Load values from the RHS matrix
+  a0 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 0 * src_stride_y));
+  if (y * (uint)K0 + 1 < SRC_HEIGHT)
+  {
+    a1 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 1 * src_stride_y));
+  }
+#if K0 > 2
+  if (y * (uint)K0 + 2 < SRC_HEIGHT)
+  {
+    a2 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 2 * src_stride_y));
+  }
+#endif // K0 > 2
+#if K0 > 3
+  if (y * (uint)K0 + 3 < SRC_HEIGHT)
+  {
+    a3 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 3 * src_stride_y));
+  }
+#endif // K0 > 3
+#if K0 > 4
+  if (y * (uint)K0 + 4 < SRC_HEIGHT)
+  {
+    a4 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 4 * src_stride_y));
+  }
+  if (y * (uint)K0 + 5 < SRC_HEIGHT)
+  {
+    a5 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 5 * src_stride_y));
+  }
+  if (y * (uint)K0 + 6 < SRC_HEIGHT)
+  {
+    a6 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 6 * src_stride_y));
+  }
+  if (y * (uint)K0 + 7 < SRC_HEIGHT)
+  {
+    a7 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 7 * src_stride_y));
+  }
+#endif // K0 > 4
+#if K0 > 8
+  if (y * (uint)K0 + 8 < SRC_HEIGHT)
+  {
+    a8 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 8 * src_stride_y));
+  }
+  if (y * (uint)K0 + 9 < SRC_HEIGHT)
+  {
+    a9 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 9 * src_stride_y));
+  }
+  if (y * (uint)K0 + 10 < SRC_HEIGHT)
+  {
+    aA = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 10 * src_stride_y));
+  }
+  if (y * (uint)K0 + 11 < SRC_HEIGHT)
+  {
+    aB = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 11 * src_stride_y));
+  }
+  if (y * (uint)K0 + 12 < SRC_HEIGHT)
+  {
+    aC = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 12 * src_stride_y));
+  }
+  if (y * (uint)K0 + 13 < SRC_HEIGHT)
+  {
+    aD = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 13 * src_stride_y));
+  }
+  if (y * (uint)K0 + 14 < SRC_HEIGHT)
+  {
+    aE = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 14 * src_stride_y));
+  }
+  if (y * (uint)K0 + 15 < SRC_HEIGHT)
+  {
+    aF = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 15 * src_stride_y));
+  }
+#endif // K0 > 8
+
+  // ---------------------------Transpose the block ------------------------------
+  REPEAT_VAR_INIT_TO_CONST(
+    N0, VEC_DATA_TYPE(DATA_TYPE, K0), res,
+    0); // VEC_DATA_TYPE(DATA_TYPE, K0)    res0=0, res1=0, res2=0,... res(N0-1)=0;
+
+#if K0 == 2
+  // This part computes the following transpositions:
+  // 2x2 -> 2x2
+  // 2x4 -> 4x2
+  // 2x8 -> 8x2
+  // 2x16 -> 16x2
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF);
+#endif // N0 > 8
+
+#elif K0 == 3 // K0 == 2
+  // This part computes the following transpositions:
+  // 3x2 -> 2x3
+  // 3x4 -> 4x3
+  // 3x8 -> 8x3
+  // 3x16 -> 16x3
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF);
+#endif // N0 > 8
+
+#elif K0 == 4 // K0 == 4
+  // This part computes the following transpositions:
+  // 4x2 -> 2x4
+  // 4x4 -> 4x4
+  // 4x8 -> 8x4
+  // 4x16 -> 16x4
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF);
+#endif // N0 > 8
+
+#elif K0 == 8 // K0 == 8
+  // This part computes the following transpositions:
+  // 8x2 -> 2x8
+  // 8x4 -> 4x8
+  // 8x8 -> 8x8
+  // 8x16 -> 16x8
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0, a4.s0, a5.s0, a6.s0, a7.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1, a4.s1, a5.s1, a6.s1, a7.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2, a4.s2, a5.s2, a6.s2, a7.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3, a4.s3, a5.s3, a6.s3, a7.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4, a4.s4, a5.s4, a6.s4, a7.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5, a4.s5, a5.s5, a6.s5, a7.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6, a4.s6, a5.s6, a6.s6, a7.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7, a4.s7, a5.s7, a6.s7, a7.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8, a4.s8, a5.s8, a6.s8, a7.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9, a4.s9, a5.s9, a6.s9, a7.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA, a4.sA, a5.sA, a6.sA, a7.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB, a4.sB, a5.sB, a6.sB, a7.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC, a4.sC, a5.sC, a6.sC, a7.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD, a4.sD, a5.sD, a6.sD, a7.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE, a4.sE, a5.sE, a6.sE, a7.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF, a4.sF, a5.sF, a6.sF, a7.sF);
+#endif // N0 > 8
+
+#elif K0 == 16 // K0 == 16
+
+  // This part computes the following transpositions:
+  // 16x2 -> 2x16
+  // 16x4 -> 4x16
+  // 16x8 -> 8x16
+  // 16x16 -> 16x16
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0, a4.s0, a5.s0, a6.s0, a7.s0,
+                                        a8.s0, a9.s0, aA.s0, aB.s0, aC.s0, aD.s0, aE.s0, aF.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1, a4.s1, a5.s1, a6.s1, a7.s1,
+                                        a8.s1, a9.s1, aA.s1, aB.s1, aC.s1, aD.s1, aE.s1, aF.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2, a4.s2, a5.s2, a6.s2, a7.s2,
+                                        a8.s2, a9.s2, aA.s2, aB.s2, aC.s2, aD.s2, aE.s2, aF.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3, a4.s3, a5.s3, a6.s3, a7.s3,
+                                        a8.s3, a9.s3, aA.s3, aB.s3, aC.s3, aD.s3, aE.s3, aF.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4, a4.s4, a5.s4, a6.s4, a7.s4,
+                                        a8.s4, a9.s4, aA.s4, aB.s4, aC.s4, aD.s4, aE.s4, aF.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5, a4.s5, a5.s5, a6.s5, a7.s5,
+                                        a8.s5, a9.s5, aA.s5, aB.s5, aC.s5, aD.s5, aE.s5, aF.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6, a4.s6, a5.s6, a6.s6, a7.s6,
+                                        a8.s6, a9.s6, aA.s6, aB.s6, aC.s6, aD.s6, aE.s6, aF.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7, a4.s7, a5.s7, a6.s7, a7.s7,
+                                        a8.s7, a9.s7, aA.s7, aB.s7, aC.s7, aD.s7, aE.s7, aF.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8, a4.s8, a5.s8, a6.s8, a7.s8,
+                                        a8.s8, a9.s8, aA.s8, aB.s8, aC.s8, aD.s8, aE.s8, aF.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9, a4.s9, a5.s9, a6.s9, a7.s9,
+                                        a8.s9, a9.s9, aA.s9, aB.s9, aC.s9, aD.s9, aE.s9, aF.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA, a4.sA, a5.sA, a6.sA, a7.sA,
+                                        a8.sA, a9.sA, aA.sA, aB.sA, aC.sA, aD.sA, aE.sA, aF.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB, a4.sB, a5.sB, a6.sB, a7.sB,
+                                        a8.sB, a9.sB, aA.sB, aB.sB, aC.sB, aD.sB, aE.sB, aF.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC, a4.sC, a5.sC, a6.sC, a7.sC,
+                                        a8.sC, a9.sC, aA.sC, aB.sC, aC.sC, aD.sC, aE.sC, aF.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD, a4.sD, a5.sD, a6.sD, a7.sD,
+                                        a8.sD, a9.sD, aA.sD, aB.sD, aC.sD, aD.sD, aE.sD, aF.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE, a4.sE, a5.sE, a6.sE, a7.sE,
+                                        a8.sE, a9.sE, aA.sE, aB.sE, aC.sE, aD.sE, aE.sE, aF.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF, a4.sF, a5.sF, a6.sF, a7.sF,
+                                        a8.sF, a9.sF, aA.sF, aB.sF, aC.sF, aD.sF, aE.sF, aF.sF);
+#endif // N0 > 8
+
+#else // N0 == 16
+#error "Not supported N0 value"
+#endif // N0 > 2
+
+  // ---------------------------Store the output values ------------------------------
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+  STORE_BLOCK(N0, K0, DATA_TYPE, res, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+#endif // defined(TRANSPOSE)
+#endif // defined(K0) && defined(N0) && defined(H0) && defined(DATA_TYPE) && defined(SRC_HEIGHT)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) && \
+  defined(M) && defined(N) && defined(K)
+
+#define CONCAT(a, b) a##b
+
+#define ARM_DOT1(a, b, c) ({ c = fma(a, b, c); })
+#define ARM_DOT2(a, b, c)   \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+  })
+#define ARM_DOT3(a, b, c)       \
+  ({                            \
+    ARM_DOT2(a, b, c);          \
+    c = fma((a.s2), (b.s2), c); \
+  })
+#define ARM_DOT4(a, b, c)       \
+  ({                            \
+    ARM_DOT3(a, b, c);          \
+    c = fma((a.s3), (b.s3), c); \
+  })
+#define ARM_DOT8(a, b, c)        \
+  ({                             \
+    ARM_DOT4((a.lo), (b.lo), c); \
+    ARM_DOT4((a.hi), (b.hi), c); \
+  })
+#define ARM_DOT16(a, b, c)       \
+  ({                             \
+    ARM_DOT8((a.lo), (b.lo), c); \
+    ARM_DOT8((a.hi), (b.hi), c); \
+  })
+
+#if N0 == 2
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+  })
+#elif N0 == 3 // N0 == 3
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##2), (c.s2));         \
+  })
+#elif N0 == 4 // N0 == 4
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##2), (c.s2));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##3), (c.s3));         \
+  })
+#elif N0 == 8 // N0 == 8
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##2), (c.s2));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##3), (c.s3));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##4), (c.s4));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##5), (c.s5));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##6), (c.s6));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##7), (c.s7));         \
+  })
+#elif N0 == 16 // N0 == 16
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##2), (c.s2));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##3), (c.s3));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##4), (c.s4));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##5), (c.s5));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##6), (c.s6));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##7), (c.s7));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##8), (c.s8));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##9), (c.s9));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##A), (c.sA));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##B), (c.sB));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##C), (c.sC));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##D), (c.sD));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##E), (c.sE));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##F), (c.sF));         \
+  })
+#else // N0 not supported
+#error "N0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is transposed
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (e.g. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (e.g. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in]  lhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in]  lhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                       Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  lhs_cross_plane_pad                (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                                          IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                                          IMAGE_DECLARATION(dst), uint lhs_stride_z,
+                                          uint rhs_stride_z,
+#if defined(BETA)
+                                          uint bias_stride_z,
+#endif // defined(BETA)
+                                          uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                          ,
+                                          uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                          ,
+                                          uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS reshaped matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes +
+                    (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+                    (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zlhs0=0,zlhs1=0,zlhs2=0,... zlhs7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+  int i = 0;
+  for (; i <= (K - K0); i += K0)
+  {
+    // Supported cases (M0, K0):
+    // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+    // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+    // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+    // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+    // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+    // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+    // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+    // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS reshaped matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+    // Accumulate
+    ARM_DOT_K0XN0(K0, a0, b, c0);
+#if M0 > 1
+    ARM_DOT_K0XN0(K0, a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+    ARM_DOT_K0XN0(K0, a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+    ARM_DOT_K0XN0(K0, a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+    ARM_DOT_K0XN0(K0, a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+    ARM_DOT_K0XN0(K0, a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+    ARM_DOT_K0XN0(K0, a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+    ARM_DOT_K0XN0(K0, a7, b, c7);
+#endif // M0 > 7
+
+    lhs_offset += K0 * sizeof(DATA_TYPE);
+    rhs_offset += (N0 * RHS_STEP_X * RHS_STEP_LOOP) * sizeof(DATA_TYPE);
+  }
+
+  // Left-over accumulations
+  for (; i < K; ++i)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, 1, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS reshaped matrix
+    LOAD_BLOCK(N0, 1, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+    // Accumulate
+    ARM_DOT_K0XN0(1, a0, b, c0);
+#if M0 > 1
+    ARM_DOT_K0XN0(1, a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+    ARM_DOT_K0XN0(1, a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+    ARM_DOT_K0XN0(1, a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+    ARM_DOT_K0XN0(1, a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+    ARM_DOT_K0XN0(1, a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+    ARM_DOT_K0XN0(1, a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+    ARM_DOT_K0XN0(1, a7, b, c7);
+#endif // M0 > 7
+
+    lhs_offset += sizeof(DATA_TYPE);
+    rhs_offset += sizeof(DATA_TYPE);
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                              (get_global_id(1) * (uint)M0 * bias_stride_y) +
+                              get_global_id(2) * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#define VFMA(a, b, c) ({ c = fma(a, b, c); })
+
+#if M0 == 1
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+  })
+#elif M0 == 2 // M0 == 2
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+  })
+#elif M0 == 3 // M0 == 3
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+  })
+#elif M0 == 4 // M0 == 4
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+  })
+#elif M0 == 5 // M0 == 5
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4));                                \
+  })
+#elif M0 == 6 // M0 == 6
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5));                                \
+  })
+#elif M0 == 7 // M0 == 7
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6));                                \
+  })
+#elif M0 == 8 // M0 == 8
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##7).s##i), b, (c##7));                                \
+  })
+#else // M0 not supported
+#error "M0 not supported"
+#endif // M0 not supported
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is NOT transposed
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90).
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (e.g. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in]  lhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in]  lhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                       Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  lhs_cross_plane_pad                (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                                           IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                                           IMAGE_DECLARATION(dst), uint lhs_stride_z,
+                                           uint rhs_stride_z,
+#if defined(BETA)
+                                           uint bias_stride_z,
+#endif // defined(BETA)
+                                           uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                           ,
+                                           uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                           ,
+                                           uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (N0)
+#define RHS_STEP_X ((N0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (N0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS reshaped matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes +
+                    (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+                    (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zin, 0);   // uint zin0=0,zin1=0,zin2=0,... zin7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0); // uint zero0=0,zero1=0,zero2=0,... zero7=0;
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+  int i = 0;
+  for (; i <= (K - K0); i += K0)
+  {
+    // Supported cases (M0, K0):
+    // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+    // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+    // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+    // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+    // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+    // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+    // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+    // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zin);
+
+    LD_RHS_VFMA_M0xN0(0, a, c);
+    LD_RHS_VFMA_M0xN0(1, a, c);
+#if K0 > 2
+    LD_RHS_VFMA_M0xN0(2, a, c);
+#endif // K0 > 2
+#if K0 > 3
+    LD_RHS_VFMA_M0xN0(3, a, c);
+#endif // K0 > 3
+#if K0 > 4
+    LD_RHS_VFMA_M0xN0(4, a, c);
+    LD_RHS_VFMA_M0xN0(5, a, c);
+    LD_RHS_VFMA_M0xN0(6, a, c);
+    LD_RHS_VFMA_M0xN0(7, a, c);
+#endif // K0 > 4
+#if K0 > 8
+    LD_RHS_VFMA_M0xN0(8, a, c);
+    LD_RHS_VFMA_M0xN0(9, a, c);
+    LD_RHS_VFMA_M0xN0(A, a, c);
+    LD_RHS_VFMA_M0xN0(B, a, c);
+    LD_RHS_VFMA_M0xN0(C, a, c);
+    LD_RHS_VFMA_M0xN0(D, a, c);
+    LD_RHS_VFMA_M0xN0(E, a, c);
+    LD_RHS_VFMA_M0xN0(F, a, c);
+#endif // K0 > 8
+
+    lhs_offset += K0 * sizeof(DATA_TYPE);
+    rhs_offset += K0 * RHS_STEP_X * RHS_STEP_LOOP * sizeof(DATA_TYPE);
+  }
+
+  // Left-over accumulations
+  for (; i < K; ++i)
+  {
+    // Load values from LHS matrix
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a0 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 0 * lhs_stride_y + zin0));
+#if M0 > 1
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a1 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 1 * lhs_stride_y + zin1));
+#endif // M0 > 1
+#if M0 > 2
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a2 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 2 * lhs_stride_y + zin2));
+#endif // M0 > 2
+#if M0 > 3
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a3 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 3 * lhs_stride_y + zin3));
+#endif // M0 > 3
+#if M0 > 4
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a4 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 4 * lhs_stride_y + zin4));
+#endif // M0 > 4
+#if M0 > 5
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a5 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 5 * lhs_stride_y + zin5));
+#endif // M0 > 5
+#if M0 > 6
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a6 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 6 * lhs_stride_y + zin6));
+#endif // M0 > 6
+#if M0 > 7
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a7 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 7 * lhs_stride_y + zin7));
+#endif // M0 > 7
+
+    LD_RHS_VFMA_M0xN0(0, a, c);
+
+    lhs_offset += sizeof(DATA_TYPE);
+    rhs_offset += RHS_STEP_X * sizeof(DATA_TYPE);
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                              (get_global_id(1) * (uint)M0 * bias_stride_y) +
+                              get_global_id(2) * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) &&
+       // defined(M) && defined(N) && defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && \
+  defined(DATA_TYPE) && defined(DATA_TYPE_ACCUMULATOR) && defined(M) && defined(N)
+
+#if defined(MIXED_PRECISION)
+#if K0 == 2
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+  })
+#elif K0 == 3 // K0 == 3
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+    c += a.s2 * b.s2;       \
+  })
+#elif K0 == 4 // K0 == 4
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+    c += a.s2 * b.s2;       \
+    c += a.s3 * b.s3;       \
+  })
+#elif K0 == 8 // K0 == 8
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+    c += a.s2 * b.s2;       \
+    c += a.s3 * b.s3;       \
+    c += a.s4 * b.s4;       \
+    c += a.s5 * b.s5;       \
+    c += a.s6 * b.s6;       \
+    c += a.s7 * b.s7;       \
+  })
+#elif K0 == 16 // K0 == 16
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+    c += a.s2 * b.s2;       \
+    c += a.s3 * b.s3;       \
+    c += a.s4 * b.s4;       \
+    c += a.s5 * b.s5;       \
+    c += a.s6 * b.s6;       \
+    c += a.s7 * b.s7;       \
+    c += a.s8 * b.s8;       \
+    c += a.s9 * b.s9;       \
+    c += a.sA * b.sA;       \
+    c += a.sB * b.sB;       \
+    c += a.sC * b.sC;       \
+    c += a.sD * b.sD;       \
+    c += a.sE * b.sE;       \
+    c += a.sF * b.sF;       \
+  })
+#else // K0 not supported
+#error "K0 value not supported"
+#endif // K0 conditions
+#else  // defined(MIXED_PRECISION)
+#if K0 == 2
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+  })
+#elif K0 == 3 // K0 == 3
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+    c = fma(a.s2, b.s2, c); \
+  })
+#elif K0 == 4 // K0 == 4
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+    c = fma(a.s2, b.s2, c); \
+    c = fma(a.s3, b.s3, c); \
+  })
+#elif K0 == 8 // K0 == 8
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+    c = fma(a.s2, b.s2, c); \
+    c = fma(a.s3, b.s3, c); \
+    c = fma(a.s4, b.s4, c); \
+    c = fma(a.s5, b.s5, c); \
+    c = fma(a.s6, b.s6, c); \
+    c = fma(a.s7, b.s7, c); \
+  })
+#elif K0 == 16 // K0 == 16
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+    c = fma(a.s2, b.s2, c); \
+    c = fma(a.s3, b.s3, c); \
+    c = fma(a.s4, b.s4, c); \
+    c = fma(a.s5, b.s5, c); \
+    c = fma(a.s6, b.s6, c); \
+    c = fma(a.s7, b.s7, c); \
+    c = fma(a.s8, b.s8, c); \
+    c = fma(a.s9, b.s9, c); \
+    c = fma(a.sA, b.sA, c); \
+    c = fma(a.sB, b.sB, c); \
+    c = fma(a.sC, b.sC, c); \
+    c = fma(a.sD, b.sD, c); \
+    c = fma(a.sE, b.sE, c); \
+    c = fma(a.sF, b.sF, c); \
+  })
+#else // K0 not supported
+#error "K0 value not supported"
+#endif // K0 conditions
+#endif // defined(MIXED_PRECISION)
+
+#if N0 == 2
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+  })
+#elif N0 == 3 // N0 == 3
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+    ARM_DOT_K0((a), (b##2), (c.s2)); \
+  })
+#elif N0 == 4 // N0 == 4
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+    ARM_DOT_K0((a), (b##2), (c.s2)); \
+    ARM_DOT_K0((a), (b##3), (c.s3)); \
+  })
+#elif N0 == 8 // N0 == 8
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+    ARM_DOT_K0((a), (b##2), (c.s2)); \
+    ARM_DOT_K0((a), (b##3), (c.s3)); \
+    ARM_DOT_K0((a), (b##4), (c.s4)); \
+    ARM_DOT_K0((a), (b##5), (c.s5)); \
+    ARM_DOT_K0((a), (b##6), (c.s6)); \
+    ARM_DOT_K0((a), (b##7), (c.s7)); \
+  })
+#elif N0 == 16 // N0 == 16
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+    ARM_DOT_K0((a), (b##2), (c.s2)); \
+    ARM_DOT_K0((a), (b##3), (c.s3)); \
+    ARM_DOT_K0((a), (b##4), (c.s4)); \
+    ARM_DOT_K0((a), (b##5), (c.s5)); \
+    ARM_DOT_K0((a), (b##6), (c.s6)); \
+    ARM_DOT_K0((a), (b##7), (c.s7)); \
+    ARM_DOT_K0((a), (b##8), (c.s8)); \
+    ARM_DOT_K0((a), (b##9), (c.s9)); \
+    ARM_DOT_K0((a), (b##A), (c.sA)); \
+    ARM_DOT_K0((a), (b##B), (c.sB)); \
+    ARM_DOT_K0((a), (b##C), (c.sC)); \
+    ARM_DOT_K0((a), (b##D), (c.sD)); \
+    ARM_DOT_K0((a), (b##E), (c.sE)); \
+    ARM_DOT_K0((a), (b##F), (c.sF)); \
+  })
+#else // N0 not supported
+#error "N0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix must be reshaped with @ref CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be NOT
+ * transposed The RHS matrix must be reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0
+ * must be transposed
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The data type used for the accumulators must be passed at compile time using
+ * -DDATA_TYPE_ACCUMULATOR (e.g. -DDATA_TYPE_ACCUMULATOR=float)
+ * @note The F16 computation also supports mixed precision through the option -DMIXED_PRECISION
+ * passed at compile time. If enabled, DATA_TYPE_ACCUMULATOR should be set to float
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (e.g. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (e.g. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (e.g. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - V0 >= 1
+ *  - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS reshaped matrix. Supported data
+ * type: F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS
+ * reshaped matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  k                                  Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in]  lhs_stride_z                       Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                                            IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                                            IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+                                            uint rhs_stride_z,
+#if defined(BETA)
+                                            uint bias_stride_z,
+#endif // defined(BETA)
+                                            uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                            ,
+                                            uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (K0)
+#define LHS_STEP_X ((K0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (K0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((get_global_id(0) * N0 >= N) || (get_global_id(1) * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  __global uchar *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+                             (get_global_id(1) % V0) * (uint)LHS_OFFSET_X * sizeof(DATA_TYPE) +
+                             (get_global_id(1) / V0) * (uint)lhs_stride_y +
+                             (get_global_id(2) * lhs_stride_z);
+
+  // Compute RHS matrix address
+  __global uchar *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+                             (get_global_id(0) % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+                             (get_global_id(0) / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_addr += (get_global_id(2) % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_addr += get_global_id(2) * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0), c, 0);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zlhs, 0); // uint zlhs0=0,zlhs1=0,zlhs2=0,... zlhs7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+  for (int i = 0; i < k; i += K0)
+  {
+    // Supported cases (M0, K0):
+    // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+    // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+    // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+    // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+    // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+    // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+    // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+    // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_addr, 0, LHS_STEP_X * sizeof(DATA_TYPE), zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_addr, 0, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+    // Accumulate
+    ARM_DOT_K0XN0(a0, b, c0);
+#if M0 > 1
+    ARM_DOT_K0XN0(a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+    ARM_DOT_K0XN0(a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+    ARM_DOT_K0XN0(a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+    ARM_DOT_K0XN0(a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+    ARM_DOT_K0XN0(a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+    ARM_DOT_K0XN0(a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+    ARM_DOT_K0XN0(a7, b, c7);
+#endif // M0 > 7
+
+    lhs_addr += (M0 * LHS_STEP_X * LHS_STEP_LOOP) * sizeof(DATA_TYPE);
+    rhs_addr += (N0 * RHS_STEP_X * RHS_STEP_LOOP) * sizeof(DATA_TYPE);
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                             (get_global_id(1) * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, get_global_id(1), HEIGHT_GEMM3D, DEPTH_GEMM3D,
+                     dst_cross_plane_pad, dst_stride_y);
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += get_global_id(2) * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += get_global_id(2) * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+#if defined(MIXED_PRECISION)
+  CONVERT_BLOCK(1, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+  ADD_BLOCK_BROADCAST(M0, c, bias_hp0);
+#else  // defined(MIXED_PRECISION)
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+#endif // defined(MIXED_PRECISION)
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                              (get_global_id(1) * (uint)M0 * bias_stride_y) +
+                              get_global_id(2) * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+#if defined(MIXED_PRECISION)
+  CONVERT_BLOCK(M0, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+  ADD_BLOCK(M0, c, bias_hp);
+#else  // defined(MIXED_PRECISION)
+  ADD_BLOCK(M0, c, bias);
+#endif // defined(MIXED_PRECISION)
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+#if defined(MIXED_PRECISION)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+#else  // defined(MIXED_PRECISION)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(MIXED_PRECISION)
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+#if defined(MIXED_PRECISION)
+  CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#else  // defined(MIXED_PRECISION)
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#endif // defined(MIXED_PRECISION)
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#if defined(LHS_TRANSPOSE)
+
+#define VTYPE(TYPE, SIZE) VEC_DATA_TYPE(TYPE, SIZE)
+
+#if defined(MIXED_PRECISION)
+
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+#define ARM_VFMA(N0, a, b, c)                                   \
+  c += (CONVERT(a, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))) * \
+       (CONVERT(b, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0)));
+#else // GPU_ARCH == GPU_ARCH_MIDGARD
+#define ARM_VFMA(N0, a, b, c)                                     \
+  c = fma((CONVERT(a, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))), \
+          (CONVERT(b, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))), (c));
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+#else // defined(MIXED_PRECISION
+
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+#define ARM_VFMA(N0, a, b, c) c += (a) * (b);
+#else // GPU_ARCH == GPU_ARCH_MIDGARD
+#define ARM_VFMA(N0, a, b, c) c = fma((a), (b), (c));
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+#endif // defined(MIXED_PRECISION)
+
+#define ARM_VVM_T_NT_1xN0x1(N0, TYPE, a, b, C) ({ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a), b, (C##0)); })
+#define ARM_VVM_T_NT_2xN0x1(N0, TYPE, a, b, C)        \
+  ({                                                  \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s0), b, (C##0)); \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s1), b, (C##1)); \
+  })
+#define ARM_VVM_T_NT_3xN0x1(N0, TYPE, a, b, C)        \
+  ({                                                  \
+    ARM_VVM_T_NT_2xN0x1(N0, TYPE, a, b, C);           \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s2), b, (C##2)); \
+  })
+#define ARM_VVM_T_NT_4xN0x1(N0, TYPE, a, b, C)        \
+  ({                                                  \
+    ARM_VVM_T_NT_3xN0x1(N0, TYPE, a, b, C);           \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s3), b, (C##3)); \
+  })
+#define ARM_VVM_T_NT_8xN0x1(N0, TYPE, a, b, C)        \
+  ({                                                  \
+    ARM_VVM_T_NT_4xN0x1(N0, TYPE, a, b, C);           \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s4), b, (C##4)); \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s5), b, (C##5)); \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s6), b, (C##6)); \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s7), b, (C##7)); \
+  })
+
+// Factory macro for the column-vector (transposed) by row-vector (not transposed) multiplication.
+// K0 = 1 a is the column-vector (transposed) b is the row-vector (not transposed) C is the output
+// matrix Lower case is a vector (a, b) Upper case is a matrix (C)
+#define ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, a, b, C) ARM_VVM_T_NT_##M0##xN0x1(N0, TYPE, a, b, C)
+
+#define ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, A, B, C) \
+  ({ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##0), (B##0), C); })
+#define ARM_MM_T_NT_M0xN0x2(M0, N0, TYPE, A, B, C)         \
+  ({                                                       \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, A, B, C);            \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##1), (B##1), C); \
+  })
+#define ARM_MM_T_NT_M0xN0x3(M0, N0, TYPE, A, B, C)         \
+  ({                                                       \
+    ARM_MM_T_NT_M0xN0x2(M0, N0, TYPE, A, B, C);            \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##2), (B##2), C); \
+  })
+#define ARM_MM_T_NT_M0xN0x4(M0, N0, TYPE, A, B, C)         \
+  ({                                                       \
+    ARM_MM_T_NT_M0xN0x3(M0, N0, TYPE, A, B, C);            \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##3), (B##3), C); \
+  })
+#define ARM_MM_T_NT_M0xN0x8(M0, N0, TYPE, A, B, C)         \
+  ({                                                       \
+    ARM_MM_T_NT_M0xN0x4(M0, N0, TYPE, A, B, C);            \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##4), (B##4), C); \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##5), (B##5), C); \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##6), (B##6), C); \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##7), (B##7), C); \
+  })
+#define ARM_MM_T_NT_M0xN0x16(M0, N0, TYPE, A, B, C)       \
+  ({                                                      \
+    ARM_MM_T_NT_M0xN0x8(M0, N0, TYPE, A, B, C);           \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##8), (B##8), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##9), (B##9), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##A), (B##A), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##B), (B##B), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##C), (B##C), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##D), (B##D), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##E), (B##E), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##F), (B##F), C); \
+  })
+
+// Factory macro for the matrix (transposed) by matrix (not transposed) multiplication.
+// The dimensions for this matrix multiplications are defined through M0, N0 and K0
+// The dimensions supported are:
+// M0: 1, 2, 3, 4, 8
+// N0: 1, 2, 3, 4, 8, 16
+// K0: 1, 2, 3, 4, 8, 16
+// This macro calls the vector-by-matrix macro K0 times
+// A, B and C are matrices
+#define ARM_MM_T_NT(M0, N0, K0, TYPE, A, B, C) \
+  CONCAT(ARM_MM_T_NT_M0xN0x, K0)               \
+  (M0, N0, TYPE, A, B, C)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix must be reshaped with @ref CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be
+ * transposed The RHS matrix must be reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0
+ * must be NOT transposed
+ *
+ * @note LHS_TRANSPOSE should be passed at compile time in order to compile this OpenCL kernel (e.g.
+ * -DLHS_TRANSPOSE).
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (e.g. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (e.g. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (e.g. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 2, 3, 4, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - V0 >= 1
+ *  - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS reshaped matrix. Supported data
+ * type: F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS
+ * reshaped matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  k                                  Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in]  lhs_stride_z                       Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                                            IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                                            IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+                                            uint rhs_stride_z,
+#if defined(BETA)
+                                            uint bias_stride_z,
+#endif // defined(BETA)
+                                            uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                            ,
+                                            uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (M0)
+#define LHS_STEP_X ((M0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (M0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (N0)
+#define RHS_STEP_X ((N0) * (H0))
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (N0)
+#endif // defined(RHS_INTERLEAVE)
+
+  const uint x = get_global_id(0);
+  const uint y = get_global_id(1);
+  const uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  __global uchar *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+                             (y % V0) * (uint)LHS_OFFSET_X * sizeof(DATA_TYPE) +
+                             (y / V0) * (uint)lhs_stride_y + (z * lhs_stride_z);
+
+  // Compute RHS matrix address
+  __global uchar *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+                             (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+                             (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_addr += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_addr += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0), c, 0);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zero, 0);
+
+  __global DATA_TYPE *lhs = (__global DATA_TYPE *)(lhs_addr);
+  __global DATA_TYPE *rhs = (__global DATA_TYPE *)(rhs_addr);
+
+  for (int i = 0; i < k; i += K0)
+  {
+    VEC_DATA_TYPE(DATA_TYPE, M0)
+    a0 = VLOAD(M0)(0, lhs);
+    VEC_DATA_TYPE(DATA_TYPE, N0)
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+#if K0 > 1
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 1
+
+#if K0 > 2
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 2
+
+#if K0 > 3
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 3
+
+#if K0 > 4
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 4
+
+#if K0 > 8
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 8
+
+#ifndef LHS_INTERLEAVE
+    lhs += (M0 * K0 * (V0 - 1));
+#endif // LHS_INTERLEAVE
+
+#ifndef RHS_INTERLEAVE
+    rhs += (N0 * K0 * (H0 - 1));
+#endif // RHS_INTERLEAVE
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr =
+    bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+#if defined(MIXED_PRECISION)
+  CONVERT_BLOCK(1, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+  ADD_BLOCK_BROADCAST(M0, c, bias_hp0);
+#else  // defined(MIXED_PRECISION)
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+#endif // defined(MIXED_PRECISION)
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * bias_stride_y) +
+                              z * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+#if defined(MIXED_PRECISION)
+  CONVERT_BLOCK(M0, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+  ADD_BLOCK(M0, c, bias_hp);
+#else  // defined(MIXED_PRECISION)
+  ADD_BLOCK(M0, c, bias);
+#endif // defined(MIXED_PRECISION)
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+#if defined(MIXED_PRECISION)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+#else  // defined(MIXED_PRECISION)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(MIXED_PRECISION)
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+#if defined(MIXED_PRECISION)
+  CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#else  // defined(MIXED_PRECISION)
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#endif // defined(MIXED_PRECISION)
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#endif // defined(LHS_TRANSPOSE)
+
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(K) &&
+       // defined(DATA_TYPE)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE)
+
+#define VFMA(a, b, c) ({ c = fma(a, b, c); })
+
+#if M0 == 1
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+  ({ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); })
+#elif M0 == 2 // M0 == 2
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+  })
+#elif M0 == 3 // M0 == 3
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+  })
+#elif M0 == 4 // M0 == 4
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+  })
+#elif M0 == 5 // M0 == 5
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+  })
+#elif M0 == 6 // M0 == 6
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+  })
+#elif M0 == 7 // M0 == 7
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6)); \
+  })
+#elif M0 == 8 // M0 == 8
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##7).s##i), b, (c##7)); \
+  })
+#else // M0 not supported
+#error "M0 not supported"
+#endif // M0 not supported
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS matrix is NOT reshaped
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (e.g. -DK=64)
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0 partial accumulations must be passed at compile time using -DK0 (e.g.,
+ * -DK0=2)
+ * @note The number of N0 columns to process must be passed at compile time using -DN0 (e.g. -DN0=2)
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in]  lhs_step_x                         lhs_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in]  lhs_step_y                         lhs_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS matrix. Supported data type:
+ * same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS matrix in X dimension (in bytes)
+ * @param[in]  rhs_step_x                         rhs_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS matrix in Y dimension (in bytes)
+ * @param[in]  rhs_step_y                         rhs_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                       Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS matrix in Z dimension (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  lhs_cross_plane_pad                (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_native(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                             IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                             IMAGE_DECLARATION(dst), uint lhs_stride_z, uint rhs_stride_z,
+#if defined(BETA)
+                             uint bias_stride_z,
+#endif // defined(BETA)
+                             uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                             ,
+                             uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                             ,
+                             uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes + x * N0 * sizeof(DATA_TYPE);
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zlhs, 0);
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+  int i = 0;
+  for (; i <= (K - K0); i += K0)
+  {
+    // Supported cases (M0, K0):
+    // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+    // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+    // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+    // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+    // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+    // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+    // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+    // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(K0, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zero);
+
+    RHS_VFMA_M0xN0(0, a, b0, c);
+    RHS_VFMA_M0xN0(1, a, b1, c);
+#if K0 > 2
+    RHS_VFMA_M0xN0(2, a, b2, c);
+#endif // K0 > 2
+#if K0 > 3
+    RHS_VFMA_M0xN0(3, a, b3, c);
+#endif // K0 > 3
+#if K0 > 4
+    RHS_VFMA_M0xN0(4, a, b4, c);
+    RHS_VFMA_M0xN0(5, a, b5, c);
+    RHS_VFMA_M0xN0(6, a, b6, c);
+    RHS_VFMA_M0xN0(7, a, b7, c);
+#endif // K0 > 4
+#if K0 > 8
+    RHS_VFMA_M0xN0(8, a, b8, c);
+    RHS_VFMA_M0xN0(9, a, b9, c);
+    RHS_VFMA_M0xN0(A, a, bA, c);
+    RHS_VFMA_M0xN0(B, a, bB, c);
+    RHS_VFMA_M0xN0(C, a, bC, c);
+    RHS_VFMA_M0xN0(D, a, bD, c);
+    RHS_VFMA_M0xN0(E, a, bE, c);
+    RHS_VFMA_M0xN0(F, a, bF, c);
+#endif // K0 > 8
+
+    lhs_offset += K0 * sizeof(DATA_TYPE);
+    rhs_offset += K0 * rhs_stride_y;
+  }
+
+  // Left-over accumulations
+  for (; i < K; ++i)
+  {
+    // Load values from LHS matrix
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a0 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 0 * lhs_stride_y + zlhs0));
+#if M0 > 1
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a1 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 1 * lhs_stride_y + zlhs1));
+#endif // M0 > 1
+#if M0 > 2
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a2 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 2 * lhs_stride_y + zlhs2));
+#endif // M0 > 2
+#if M0 > 3
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a3 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 3 * lhs_stride_y + zlhs3));
+#endif // M0 > 3
+#if M0 > 4
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a4 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 4 * lhs_stride_y + zlhs4));
+#endif // M0 > 4
+#if M0 > 5
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a5 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 5 * lhs_stride_y + zlhs5));
+#endif // M0 > 5
+#if M0 > 6
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a6 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 6 * lhs_stride_y + zlhs6));
+#endif // M0 > 6
+#if M0 > 7
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a7 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 7 * lhs_stride_y + zlhs7));
+#endif // M0 > 7
+
+    VEC_DATA_TYPE(DATA_TYPE, N0)
+    b = VLOAD(N0)(0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0 * rhs_stride_y));
+    RHS_VFMA_M0xN0(0, a, b, c);
+
+    lhs_offset += sizeof(DATA_TYPE);
+    rhs_offset += rhs_stride_y;
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                              (get_global_id(1) * (uint)M0 * bias_stride_y) +
+                              get_global_id(2) * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE)
+
+#if defined(COLS_B) && defined(MULT_TRANSPOSE1XW_WIDTH) && defined(MULT_INTERLEAVE4X4_HEIGHT)
+/** This OpenCL kernel is optimised for Midgard. It computes the matrix multiplication between
+ * matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f32(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                 IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                 IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                 uint src1_stride_z,
+#if defined(BETA)
+                                                 uint src2_stride_z,
+#endif // defined(BETA)
+                                                 uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                 ,
+                                                 uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 4;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global float *src_addr_a = (__global float *)(src0_ptr + src0_addr_in_bytes);
+  __global float *src_addr_b = (__global float *)(src1_ptr + src1_addr_in_bytes);
+
+  // Compute end row address for matrix B
+  __global float *src_end_addr_b = src_addr_b + COLS_B;
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  float4 c0 = 0.0f;
+  float4 c1 = 0.0f;
+  float4 c2 = 0.0f;
+  float4 c3 = 0.0f;
+
+  for (; src_addr_b <= (src_end_addr_b - (int)(8 * MULT_TRANSPOSE1XW_WIDTH));
+       src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = vload4(0, src_addr_a);
+    float4 b0 = vload4(0, src_addr_b);
+
+    c0 += (float4)a0.s0 * b0;
+    c1 += (float4)a0.s1 * b0;
+    c2 += (float4)a0.s2 * b0;
+    c3 += (float4)a0.s3 * b0;
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT);
+    b0 = vload4(0, src_addr_b + 4 * MULT_TRANSPOSE1XW_WIDTH);
+
+    c0 += (float4)a0.s0 * b0;
+    c1 += (float4)a0.s1 * b0;
+    c2 += (float4)a0.s2 * b0;
+    c3 += (float4)a0.s3 * b0;
+  }
+
+  for (; src_addr_b < src_end_addr_b;
+       src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = vload4(0, src_addr_a);
+    float4 b0 = vload4(0, src_addr_b);
+
+    c0 += (float4)a0.s0 * b0;
+    c1 += (float4)a0.s1 * b0;
+    c2 += (float4)a0.s2 * b0;
+    c3 += (float4)a0.s3 * b0;
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+  LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x4 block
+  vstore4(c0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore4(c1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore4(c2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore4(c3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel is optimized for Bifrost and tt computes the matrix multiplication between
+ * matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f32_bifrost(IMAGE_DECLARATION(src0),
+                                                         IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                         IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                         IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                         uint src1_stride_z,
+#if defined(BETA)
+                                                         uint src2_stride_z,
+#endif // defined(BETA)
+                                                         uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                         ,
+                                                         uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 4;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global float *src_addr_a = (__global float *)(src0_ptr + src0_addr_in_bytes);
+  __global float *src_addr_b = (__global float *)(src1_ptr + src1_addr_in_bytes);
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  float4 c0 = 0.0f;
+  float4 c1 = 0.0f;
+  float4 c2 = 0.0f;
+  float4 c3 = 0.0f;
+
+#define COLS_MTX_B (COLS_B / (4 * MULT_TRANSPOSE1XW_WIDTH))
+
+  int i = 0;
+  for (; i <= (int)(COLS_MTX_B - 4); i += 4)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = vload4(0, src_addr_a);
+    float4 b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+  }
+
+  for (; i < (int)(COLS_MTX_B); ++i)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = vload4(0, src_addr_a);
+    float4 b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+  LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x4 block
+  vstore4(c0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore4(c1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore4(c2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore4(c3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+// Undefine local defines
+#undef COLS_MTX_B
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel computes the matrix multiplication between matrix A reshaped (src0) and
+ * matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                 IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                 IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                 uint src1_stride_z,
+#if defined(BETA)
+                                                 uint src2_stride_z,
+#endif // defined(BETA)
+                                                 uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                 ,
+                                                 uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+  __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+  // Compute end row address for matrix B
+  __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  half8 c0 = 0.0f;
+  half8 c1 = 0.0f;
+  half8 c2 = 0.0f;
+  half8 c3 = 0.0f;
+
+  for (; src_addr_b <= (src_end_addr_b - (int)(16 * MULT_TRANSPOSE1XW_WIDTH));
+       src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 16 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    half4 a0 = vload4(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    c0 += (half8)a0.s0 * b0;
+    c1 += (half8)a0.s1 * b0;
+    c2 += (half8)a0.s2 * b0;
+    c3 += (half8)a0.s3 * b0;
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT);
+    b0 = vload8(0, src_addr_b + 8 * MULT_TRANSPOSE1XW_WIDTH);
+
+    c0 += (half8)a0.s0 * b0;
+    c1 += (half8)a0.s1 * b0;
+    c2 += (half8)a0.s2 * b0;
+    c3 += (half8)a0.s3 * b0;
+  }
+
+  for (; src_addr_b < src_end_addr_b;
+       src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    half4 a0 = vload4(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    c0 += (half8)a0.s0 * b0;
+    c1 += (half8)a0.s1 * b0;
+    c2 += (half8)a0.s2 * b0;
+    c3 += (half8)a0.s3 * b0;
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, half, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x8 block
+  vstore8(c0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore8(c1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore8(c2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore8(c3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel computes the matrix multiplication between matrix A reshaped (src0) and
+ * matrix B reshaped (src1) while accumulating the result in a 32 floating point variable.
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16_acc32(IMAGE_DECLARATION(src0),
+                                                       IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                       IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                       IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                       uint src1_stride_z,
+#if defined(BETA)
+                                                       uint src2_stride_z,
+#endif // defined(BETA)
+                                                       uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                       ,
+                                                       uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+  __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+  // Compute end row address for matrix B
+  __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  float8 c0 = 0.0f;
+  float8 c1 = 0.0f;
+  float8 c2 = 0.0f;
+  float8 c3 = 0.0f;
+
+  for (; src_addr_b <= (src_end_addr_b - (int)(16 * MULT_TRANSPOSE1XW_WIDTH));
+       src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 16 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = convert_float4(vload4(0, src_addr_a));
+    float8 b0 = convert_float8(vload8(0, src_addr_b));
+
+    c0 += (float8)a0.s0 * b0;
+    c1 += (float8)a0.s1 * b0;
+    c2 += (float8)a0.s2 * b0;
+    c3 += (float8)a0.s3 * b0;
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = convert_float4(vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT));
+    b0 = convert_float8(vload8(0, src_addr_b + 8 * MULT_TRANSPOSE1XW_WIDTH));
+
+    c0 += (float8)a0.s0 * b0;
+    c1 += (float8)a0.s1 * b0;
+    c2 += (float8)a0.s2 * b0;
+    c3 += (float8)a0.s3 * b0;
+  }
+
+  for (; src_addr_b < src_end_addr_b;
+       src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = convert_float4(vload4(0, src_addr_a));
+    float8 b0 = convert_float8(vload8(0, src_addr_b));
+
+    c0 += (float8)a0.s0 * b0;
+    c1 += (float8)a0.s1 * b0;
+    c2 += (float8)a0.s2 * b0;
+    c3 += (float8)a0.s3 * b0;
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+  float8 bias_f0 = convert_float8(bias0);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias_f0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+  float8 bias_f0 = convert_float8(bias0);
+  float8 bias_f1 = convert_float8(bias1);
+  float8 bias_f2 = convert_float8(bias2);
+  float8 bias_f3 = convert_float8(bias3);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias_f);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+  half8 c_h0 = convert_half8(c0);
+  half8 c_h1 = convert_half8(c1);
+  half8 c_h2 = convert_half8(c2);
+  half8 c_h3 = convert_half8(c3);
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c_h, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x8 block
+  vstore8(c_h0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore8(c_h1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore8(c_h2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore8(c_h3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel optimized for Bifrost architectures computes the matrix multiplication
+ * between matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16_bifrost(IMAGE_DECLARATION(src0),
+                                                         IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                         IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                         IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                         uint src1_stride_z,
+#if defined(BETA)
+                                                         uint src2_stride_z,
+#endif // defined(BETA)
+                                                         uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                         ,
+                                                         uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+  __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+  // Compute end row address for matrix B
+  __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  half8 c0 = 0.0f;
+  half8 c1 = 0.0f;
+  half8 c2 = 0.0f;
+  half8 c3 = 0.0f;
+
+#define COLS_MTX_B (COLS_B / (8 * MULT_TRANSPOSE1XW_WIDTH))
+
+  int i = 0;
+  for (; i <= (int)(COLS_MTX_B - 4); i += 4)
+  {
+#if MULT_INTERLEAVE4X4_HEIGHT == 1
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    half8 a0 = vload8(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix B (transposed)
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s4, b0, c0);
+    c1 = fma((half8)a0.s5, b0, c1);
+    c2 = fma((half8)a0.s6, b0, c2);
+    c3 = fma((half8)a0.s7, b0, c3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload8(0, src_addr_a);
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix B (transposed)
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s4, b0, c0);
+    c1 = fma((half8)a0.s5, b0, c1);
+    c2 = fma((half8)a0.s6, b0, c2);
+    c3 = fma((half8)a0.s7, b0, c3);
+#else  // MULT_INTERLEAVE4X4_HEIGHT == 1
+       // Load values from matrix A (interleaved) and matrix B (transposed)
+    half4 a0 = vload4(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+#endif // MULT_INTERLEAVE4X4_HEIGHT == 1
+  }
+
+  for (; i < (int)(COLS_MTX_B); ++i)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    half4 a0 = vload4(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, half, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x8 block
+  vstore8(c0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore8(c1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore8(c2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore8(c3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+// Undefine local defines
+#undef COLS_MTX_B
+
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+
+#endif // defined(COLS_B) && defined(MULT_TRANSPOSE1XW_WIDTH) && defined(MULT_INTERLEAVE4X4_HEIGHT)
+
+#if defined(COLS_A) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && \
+  (NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+#if defined(DATA_TYPE)
+#define VECTOR_TYPE VEC_DATA_TYPE(DATA_TYPE, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped.
+ *
+ * @note This OpenCL kernel works with floating point data types (F16/F32)
+ * @note The floating point data type must be passed at compile time using -DDATA_TYPE (e.g.
+ * -DDATA_TYPE=float)
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y
+ * @note The number of matrix A columns and the optional alpha's value need to be passed at compile
+ * time using -DCOLS_A and -DALPHA
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16/F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the output tensor (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                     IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                     IMAGE_DECLARATION(dst), uint src0_stride_z, uint src1_stride_z,
+#if defined(BETA)
+                                     uint src2_stride_z,
+#endif // defined(BETA)
+                                     uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                     ,
+                                     uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                     ,
+                                     uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and Matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for the matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for the matrix B
+  src_addr.s1 += idx * sizeof(DATA_TYPE);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  int end_row_vec_a = src_addr.s0 + (COLS_A * sizeof(DATA_TYPE));
+
+  VECTOR_TYPE acc0 = 0.0f;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  VECTOR_TYPE acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  VECTOR_TYPE acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  VECTOR_TYPE acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  for (; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(DATA_TYPE));
+       src_addr += (int2)(2 * sizeof(DATA_TYPE), 2 * src1_stride_y))
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 2, DATA_TYPE, a, src0_ptr, src_addr.s0,
+               src0_stride_y, zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+      // Load values from matrix A
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a0 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a1 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a2 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a3 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    VECTOR_TYPE b0 =
+      VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1));
+    VECTOR_TYPE b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(
+      0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1 + src1_stride_y));
+
+    // Accumulate
+    acc0 += b0 * (VECTOR_TYPE)a0.s0;
+    acc0 += b1 * (VECTOR_TYPE)a0.s1;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 += b0 * (VECTOR_TYPE)a1.s0;
+    acc1 += b1 * (VECTOR_TYPE)a1.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 += b0 * (VECTOR_TYPE)a2.s0;
+    acc2 += b1 * (VECTOR_TYPE)a2.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 += b0 * (VECTOR_TYPE)a3.s0;
+    acc3 += b1 * (VECTOR_TYPE)a3.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  }
+
+  for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(sizeof(DATA_TYPE), src1_stride_y))
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    DATA_TYPE a0 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    DATA_TYPE a1 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    DATA_TYPE a2 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    DATA_TYPE a3 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    DATA_TYPE a0 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    DATA_TYPE a1 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    DATA_TYPE a2 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    DATA_TYPE a3 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    VECTOR_TYPE b0 =
+      VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1));
+
+    // Accumulate
+    acc0 += b0 * (VECTOR_TYPE)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 += b0 * (VECTOR_TYPE)a1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 += b0 * (VECTOR_TYPE)a2;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 += b0 * (VECTOR_TYPE)a3;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, DATA_TYPE, acc, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes +
+    (get_global_id(0) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_X * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, bias, src2_addr, 0, src2_stride_y,
+             zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes +
+    (get_global_id(0) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_X * sizeof(DATA_TYPE)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, bias,
+             src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, DATA_TYPE, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+  STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, acc,
+              dst_addr, dst_stride_y, zout.s);
+}
+#endif // defined(DATA_TYPE)
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped
+ *
+ * @note This OpenCL kernel works with the 32-bit floating point data type (float) and uses the fma
+ * units.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f32_bifrost(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                 IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                 IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                 uint src1_stride_z,
+#if defined(BETA)
+                                                 uint src2_stride_z,
+#endif // defined(BETA)
+                                                 uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                 ,
+                                                 uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                 ,
+                                                 uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for matrix B
+  src_addr.s1 += idx * sizeof(float);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  // Initialize accumulators
+  float4 acc0 = 0.0f;
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  float4 acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  float4 acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  float4 acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  // A and B src indices get incremented at the same time.
+  int i = 0;
+  for (; i <= ((int)COLS_A - 4); i += 4)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A and matrix B
+    LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, float, a, src0_ptr, src_addr.s0, src0_stride_y,
+               zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+      // Load values from matrix A and matrix B
+    float4 a0 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float4 a1 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float4 a2 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float4 a3 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s0, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0.s0, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0.s0, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0.s0, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+    acc1.s0 = fma(a1.s0, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1.s0, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1.s0, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1.s0, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+    acc2.s0 = fma(a2.s0, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2.s0, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2.s0, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2.s0, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    acc3.s0 = fma(a3.s0, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3.s0, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3.s0, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3.s0, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    // Load values from matrix A and matrix B
+    b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s1, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0.s1, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0.s1, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0.s1, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+    acc1.s0 = fma(a1.s1, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1.s1, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1.s1, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1.s1, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+    acc2.s0 = fma(a2.s1, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2.s1, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2.s1, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2.s1, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    acc3.s0 = fma(a3.s1, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3.s1, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3.s1, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3.s1, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    // Load values from matrix A and matrix B
+    b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s2, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0.s2, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0.s2, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0.s2, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+    acc1.s0 = fma(a1.s2, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1.s2, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1.s2, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1.s2, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+    acc2.s0 = fma(a2.s2, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2.s2, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2.s2, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2.s2, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    acc3.s0 = fma(a3.s2, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3.s2, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3.s2, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3.s2, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    // Load values from matrix A and matrix B
+    b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s3, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0.s3, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0.s3, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0.s3, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+    acc1.s0 = fma(a1.s3, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1.s3, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1.s3, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1.s3, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+    acc2.s0 = fma(a2.s3, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2.s3, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2.s3, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2.s3, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    acc3.s0 = fma(a3.s3, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3.s3, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3.s3, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3.s3, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += 4 * sizeof(float);
+  }
+
+  for (; i < (int)COLS_A; ++i)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0, b0.s3, acc0.s3);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1.s0 = fma(a1, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1, b0.s3, acc1.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2.s0 = fma(a2, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2, b0.s3, acc2.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3.s0 = fma(a3, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += sizeof(float);
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+  LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store the output block
+  vstore4(acc0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  vstore4(acc1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  vstore4(acc2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  vstore4(acc3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+}
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped
+ *
+ * @note This OpenCL kernel works with the 32-bit floating point data type (float) and uses the fma
+ * units. This OpenCL kernel is optimized for Bifrost when the number of matrix B columns is less or
+ * equal to 1000.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=2.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha if
+ * alpha!=1.0f.
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f32_bifrost_1000(IMAGE_DECLARATION(src0),
+                                                      IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                      IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                      IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                      uint src1_stride_z,
+#if defined(BETA)
+                                                      uint src2_stride_z,
+#endif // defined(BETA)
+                                                      uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                      ,
+                                                      uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                      ,
+                                                      uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Requires 2 NUM_ELEMS_PROCESSED_PER_THREAD_X, C vect2, A vect4, B (2 vload2) // to fix for
+  // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and Matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for the matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for the matrix B
+  src_addr.s1 += idx * sizeof(float);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  // Initialize accumulators
+  float2 acc0 = 0.0f;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  float2 acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  float2 acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  float2 acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  // A and B src indices get incremented at the same time.
+  int i = 0;
+  for (; i <= ((int)COLS_A - 8); i += 8)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    float8 a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + zin.s0));
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    float8 a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float2 b0 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b1 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b2 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b3 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b4 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b5 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b6 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b7 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s0, b0.s0, acc0.s0);
+    acc0.s0 = fma(a0.s1, b1.s0, acc0.s0);
+    acc0.s0 = fma(a0.s2, b2.s0, acc0.s0);
+    acc0.s0 = fma(a0.s3, b3.s0, acc0.s0);
+    acc0.s0 = fma(a0.s4, b4.s0, acc0.s0);
+    acc0.s0 = fma(a0.s5, b5.s0, acc0.s0);
+    acc0.s0 = fma(a0.s6, b6.s0, acc0.s0);
+    acc0.s0 = fma(a0.s7, b7.s0, acc0.s0);
+
+    acc0.s1 = fma(a0.s0, b0.s1, acc0.s1);
+    acc0.s1 = fma(a0.s1, b1.s1, acc0.s1);
+    acc0.s1 = fma(a0.s2, b2.s1, acc0.s1);
+    acc0.s1 = fma(a0.s3, b3.s1, acc0.s1);
+    acc0.s1 = fma(a0.s4, b4.s1, acc0.s1);
+    acc0.s1 = fma(a0.s5, b5.s1, acc0.s1);
+    acc0.s1 = fma(a0.s6, b6.s1, acc0.s1);
+    acc0.s1 = fma(a0.s7, b7.s1, acc0.s1);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+    acc1.s0 = fma(a0.s0, b0.s0, acc1.s0);
+    acc1.s0 = fma(a0.s1, b1.s0, acc1.s0);
+    acc1.s0 = fma(a0.s2, b2.s0, acc1.s0);
+    acc1.s0 = fma(a0.s3, b3.s0, acc1.s0);
+    acc1.s0 = fma(a0.s4, b4.s0, acc1.s0);
+    acc1.s0 = fma(a0.s5, b5.s0, acc1.s0);
+    acc1.s0 = fma(a0.s6, b6.s0, acc1.s0);
+    acc1.s0 = fma(a0.s7, b7.s0, acc1.s0);
+
+    acc1.s1 = fma(a0.s0, b0.s1, acc1.s1);
+    acc1.s1 = fma(a0.s1, b1.s1, acc1.s1);
+    acc1.s1 = fma(a0.s2, b2.s1, acc1.s1);
+    acc1.s1 = fma(a0.s3, b3.s1, acc1.s1);
+    acc1.s1 = fma(a0.s4, b4.s1, acc1.s1);
+    acc1.s1 = fma(a0.s5, b5.s1, acc1.s1);
+    acc1.s1 = fma(a0.s6, b6.s1, acc1.s1);
+    acc1.s1 = fma(a0.s7, b7.s1, acc1.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+    acc2.s0 = fma(a0.s0, b0.s0, acc2.s0);
+    acc2.s0 = fma(a0.s1, b1.s0, acc2.s0);
+    acc2.s0 = fma(a0.s2, b2.s0, acc2.s0);
+    acc2.s0 = fma(a0.s3, b3.s0, acc2.s0);
+    acc2.s0 = fma(a0.s4, b4.s0, acc2.s0);
+    acc2.s0 = fma(a0.s5, b5.s0, acc2.s0);
+    acc2.s0 = fma(a0.s6, b6.s0, acc2.s0);
+    acc2.s0 = fma(a0.s7, b7.s0, acc2.s0);
+
+    acc2.s1 = fma(a0.s0, b0.s1, acc2.s1);
+    acc2.s1 = fma(a0.s1, b1.s1, acc2.s1);
+    acc2.s1 = fma(a0.s2, b2.s1, acc2.s1);
+    acc2.s1 = fma(a0.s3, b3.s1, acc2.s1);
+    acc2.s1 = fma(a0.s4, b4.s1, acc2.s1);
+    acc2.s1 = fma(a0.s5, b5.s1, acc2.s1);
+    acc2.s1 = fma(a0.s6, b6.s1, acc2.s1);
+    acc2.s1 = fma(a0.s7, b7.s1, acc2.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+    acc3.s0 = fma(a0.s0, b0.s0, acc3.s0);
+    acc3.s0 = fma(a0.s1, b1.s0, acc3.s0);
+    acc3.s0 = fma(a0.s2, b2.s0, acc3.s0);
+    acc3.s0 = fma(a0.s3, b3.s0, acc3.s0);
+    acc3.s0 = fma(a0.s4, b4.s0, acc3.s0);
+    acc3.s0 = fma(a0.s5, b5.s0, acc3.s0);
+    acc3.s0 = fma(a0.s6, b6.s0, acc3.s0);
+    acc3.s0 = fma(a0.s7, b7.s0, acc3.s0);
+
+    acc3.s1 = fma(a0.s0, b0.s1, acc3.s1);
+    acc3.s1 = fma(a0.s1, b1.s1, acc3.s1);
+    acc3.s1 = fma(a0.s2, b2.s1, acc3.s1);
+    acc3.s1 = fma(a0.s3, b3.s1, acc3.s1);
+    acc3.s1 = fma(a0.s4, b4.s1, acc3.s1);
+    acc3.s1 = fma(a0.s5, b5.s1, acc3.s1);
+    acc3.s1 = fma(a0.s6, b6.s1, acc3.s1);
+    acc3.s1 = fma(a0.s7, b7.s1, acc3.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += sizeof(float) * 8;
+  }
+  // float size increment
+  for (; i < (int)COLS_A; ++i)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float2 b0 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0, b0.s1, acc0.s1);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1.s0 = fma(a1, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1, b0.s1, acc1.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2.s0 = fma(a2, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2, b0.s1, acc2.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3.s0 = fma(a3, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3, b0.s1, acc3.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += sizeof(float);
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)2 * sizeof(float));
+
+  LOAD_BLOCK(1, 2, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)2 * sizeof(float)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 2, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store the output block
+  vstore2(acc0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  vstore2(acc1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  vstore2(acc2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  vstore2(acc3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+}
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not beed reshaped
+ *
+ * @note This OpenCL kernel works with the 16-bit floating point data type (half) and accumulating
+ * the result in a 32 floating point variable.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f16_bifrost_acc32(IMAGE_DECLARATION(src0),
+                                                       IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                       IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                       IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                       uint src1_stride_z,
+#if defined(BETA)
+                                                       uint src2_stride_z,
+#endif // defined(BETA)
+                                                       uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                       ,
+                                                       uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                       ,
+                                                       uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and Matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for the matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for the matrix B
+  src_addr.s1 += idx * sizeof(half);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  float8 acc0 = 0.0h;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  float8 acc1 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  float8 acc2 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  float8 acc3 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  int i = 0;
+  for (; i <= ((int)COLS_A - 4); i += 4)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, half, a, src0_ptr, src_addr.s0, src0_stride_y,
+               zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+      // Load values from matrix A
+    half4 a0 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half4 a1 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half4 a2 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half4 a3 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float8 b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+    src_addr.s1 += src1_stride_y;
+
+    // Accumulate
+    acc0 = fma(b0, (float8)a0.s0, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1.s0, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2.s0, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3.s0, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (float8)a0.s1, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1.s1, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2.s1, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3.s1, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (float8)a0.s2, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1.s2, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2.s2, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3.s2, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (float8)a0.s3, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1.s3, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2.s3, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3.s3, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += 4 * sizeof(half);
+  }
+
+  for (; i < (int)COLS_A; ++i)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float8 b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+
+    src_addr += (int2)(sizeof(half), src1_stride_y);
+
+    // Accumulate
+    acc0 = fma(b0, (float8)a0, acc0); // b0 * (half8)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1, acc1); // b0 * (half8)a1;
+#endif                                // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2, acc2); // b0 * (half8)a2;
+#endif                                // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3, acc3); // b0 * (half8)a3;
+#endif                                // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+  float8 bias_f0 = convert_float8(bias0);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias_f0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+  float8 bias_f0 = convert_float8(bias0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  float8 bias_f1 = convert_float8(bias1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  float8 bias_f2 = convert_float8(bias2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  float8 bias_f3 = convert_float8(bias3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias_f);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+  half8 acc_h0 = convert_half8(acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  half8 acc_h1 = convert_half8(acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  half8 acc_h2 = convert_half8(acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  half8 acc_h3 = convert_half8(acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, acc_h, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store the output block
+  STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, acc_h, dst_addr, dst_stride_y, zout.s);
+}
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not beed reshaped
+ *
+ * @note This OpenCL kernel works with the 16-bit floating point data type (half) and uses the fma
+ * units.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f16_bifrost(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                 IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                 IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                 uint src1_stride_z,
+#if defined(BETA)
+                                                 uint src2_stride_z,
+#endif // defined(BETA)
+                                                 uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                 ,
+                                                 uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                 ,
+                                                 uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and Matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for the matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for the matrix B
+  src_addr.s1 += idx * sizeof(half);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  half8 acc0 = 0.0h;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  half8 acc1 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  half8 acc2 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  half8 acc3 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  int i = 0;
+  for (; i <= ((int)COLS_A - 4); i += 4)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, half, a, src0_ptr, src_addr.s0, src0_stride_y,
+               zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+      // Load values from matrix A
+    half4 a0 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half4 a1 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half4 a2 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half4 a3 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    half8 b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Accumulate
+    acc0 = fma(b0, (half8)a0.s0, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1.s0, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2.s0, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3.s0, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (half8)a0.s1, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1.s1, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2.s1, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3.s1, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (half8)a0.s2, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1.s2, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2.s2, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3.s2, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (half8)a0.s3, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1.s3, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2.s3, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3.s3, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += 4 * sizeof(half);
+  }
+
+  for (; i < (int)COLS_A; ++i)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    half8 b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+
+    src_addr += (int2)(sizeof(half), src1_stride_y);
+
+    // Accumulate
+    acc0 = fma(b0, (half8)a0, acc0); // b0 * (half8)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1, acc1); // b0 * (half8)a1;
+#endif                               // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2, acc2); // b0 * (half8)a2;
+#endif                               // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3, acc3); // b0 * (half8)a3;
+#endif                               // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, half, acc, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store the output block
+  STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, acc, dst_addr, dst_stride_y, zout.s);
+}
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+
+#endif // defined(COLS_A) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) &&
+       // (NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+
+#if defined(BETA)
+/** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account
+ * that the second matrix might be weighted by a scalar value beta:
+ *
+ * @note The beta's value need to be passed at compile time using -DBETA
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types:
+ * F32
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_ma_f32(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+  // Load values from A x B
+  float4 alpha_ab = vload4(0, (__global float *)dst.ptr);
+
+  // Load values from Matrix C
+  float4 c = vload4(0, (__global float *)src.ptr);
+
+  // Computes alpha * axb + beta * c
+  float4 out = alpha_ab + (float4)BETA * c;
+
+  // Store final result in axb matrix
+  vstore4(out, 0, (__global float *)dst.ptr);
+}
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account
+ * that the second matrix might be weighted by a scalar value beta:
+ *
+ * @note The beta's value need to be passed at compile time using -DBETA
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types:
+ * F16
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_ma_f16(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+  // Load values from A x B
+  half8 alpha_ab = vload8(0, (__global half *)dst.ptr);
+
+  // Load values from Matrix C
+  half8 c = vload8(0, (__global half *)src.ptr);
+
+  // Computes alpha * axb + beta * c
+  half8 out = alpha_ab + (half8)BETA * c;
+
+  // Store final result in axb matrix
+  vstore8(out, 0, (__global half *)dst.ptr);
+}
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+#endif // defined(BETA)
+
+#if defined(WIDTH_VECTOR_A)
+/** This OpenCL kernel computes the vector by matrix multiplication between each row of A (src0) and
+ * matrix B (src1) used for locally connected layer
+ *
+ * @note The width of A need to be passed at compile time using -DWIDTH_VECTOR_A
+ *
+ * @note The input A and matrix B must not be reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_step_z                        src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0), TENSOR3D_DECLARATION(src1),
+                             IMAGE_DECLARATION(dst))
+{
+  int idx = get_global_id(0) * 4;
+  int idy = get_global_id(1);
+
+  // Compute the address for the vector A and matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes + src0_stride_y * idy,
+                          src1_offset_first_element_in_bytes + src1_stride_z * idy));
+  src_addr.s1 += idx * sizeof(float);
+
+  int end_row_vec_a = src_addr.s0 + (WIDTH_VECTOR_A * sizeof(float));
+
+  float4 acc = 0.0f;
+
+  for (; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(float));
+       src_addr += (int2)(2 * sizeof(float), 2 * src1_stride_y))
+  {
+    float2 a0 = vload2(0, (__global float *)(src0_ptr + src_addr.s0));
+    float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    float4 b1 = vload4(0, (__global float *)(src1_ptr + src_addr.s1 + src1_stride_y));
+
+    acc += b0 * (float4)a0.s0;
+    acc += b1 * (float4)a0.s1;
+  }
+
+  for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(sizeof(float), src1_stride_y))
+  {
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0));
+    float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+
+    acc += b0 * (float4)a0;
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  vstore4(acc, 0, (__global float *)(offset(&dst, 0, 0)));
+}
+#endif // defined(WIDTH_VECTOR_A)
+
+/** This kernel accumulates each row with the biases vector.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=short.
+ * @note The vector size must be passed at compile time using -DVECTOR_SIZE e.g. -DVECTOR_SIZE=16.
+ *
+ * @param[in, out] accum_ptr                            Pointer to the accumulate tensor. Supported
+ * data type: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in]      accum_stride_x                       Stride of the accmulate tensor in X
+ * dimension (in bytes)
+ * @param[in]      accum_step_x                         accum_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]      accum_stride_y                       Stride of the accumlulate tensor in Y
+ * dimension (in bytes)
+ * @param[in]      accum_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]      accum_offset_first_element_in_bytes  The offset of the first element in the
+ * accumulate tensor
+ * @param[in]      biases_ptr                           Pointer to the biases vector. Same as @p
+ * accum_ptr
+ * @param[in]      biases_stride_x                      Stride of the destination tensor in X
+ * dimension (in bytes)
+ * @param[in]      biases_step_x                        dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]      biases_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+#if defined(DATA_TYPE) && defined(VECTOR_SIZE)
+__kernel void gemm_accumulate_biases(IMAGE_DECLARATION(accum), VECTOR_DECLARATION(biases))
+{
+  Image accum = CONVERT_TO_IMAGE_STRUCT(accum);
+  Vector biases = CONVERT_TO_VECTOR_STRUCT(biases);
+
+  // Vector size, e.g. number of vector elements.
+  VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
+  accum_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)accum.ptr);
+  VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
+  biases_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)biases.ptr);
+  accum_value = biases_value + accum_value;
+  // Store result in the accumulate buffer
+  VSTORE(VECTOR_SIZE)
+  (accum_value, 0, (__global DATA_TYPE *)accum.ptr);
+}
+#endif // defined(DATA_TYPE) && defined(VECTOR_SIZE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm_helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm_helpers.h

new file mode 100644 (file)

index 0000000..0c75d06
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm_helpers.h
@@ -0,0 +1,1235 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "activation_float_helpers.h"
+#include "helpers.h"
+
+/** Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
+ * @name LOAD_ROW_n
+ *
+ * @param[in] N0        The number of rows to load
+ * @param[in] DATA_TYPE The data type of variables
+ * @param[in] BASENAME  The basename of the destination variables for the loaded rows
+ * @param[in] PTR       The base pointer
+ * @param[in] OFFSET    The offset within a row
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The z-axis offset vector
+ * @{
+ */
+#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
+
+#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
+
+#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
+
+#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
+
+#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
+
+#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
+
+#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
+
+#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
+
+#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
+
+#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)        \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
+
+#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
+
+#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
+
+#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
+
+#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
+
+#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
+
+#define LOAD_ROW_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
+
+/** @}*/ // end of group LOAD_ROW_n
+
+/** Load Blocks (consecutive rows and columns) with Z offset.
+ * @name LOAD_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16
+ * The data to load is expected to have consecutive names for each row.
+ * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3, and Z=zin, the expected Z offsets are zin0, zin1 and zin2.
+ *
+ * @param[in] M0        The number of consecutive rows
+ * @param[in] N0        The number of consecutive columns
+ * @param[in] DATA_TYPE The data type of the target
+ * @param[in] BASENAME  The basename of the result variables
+ * @param[in] PTR       The base pointer for the data
+ * @param[in] OFFSET    The offset within a row
+ * @param[in] STRIDE_Y  The stride in y-axis direction
+ * @param[in] Z         The z-axis offset vector
+ * @{
+ */
+#define LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
+#define LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
+/** @} */ // end of group LOAD_BLOCK
+
+/** Loads the elements from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
+ * @name LOAD_ELEMENT_n
+ *
+ * @param[in] N0        The number of rows to load
+ * @param[in] DATA_TYPE The data type of variables
+ * @param[in] BASENAME  The basename of the destination variables for the loaded rows
+ * @param[in] PTR       The base pointer
+ * @param[in] OFFSET    The offset within a row
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @{
+ */
+#define LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##0 = *((__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y));
+
+#define LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##1 = *((__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y));
+
+#define LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##2 = *((__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y));
+
+#define LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##3 = *((__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y));
+
+#define LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##4 = *((__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y));
+
+#define LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##5 = *((__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y));
+
+#define LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##6 = *((__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y));
+
+#define LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##7 = *((__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y));
+
+#define LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##8 = *((__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y));
+
+#define LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)        \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##9 = *((__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y));
+
+#define LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##A = *((__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y));
+
+#define LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##B = *((__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y));
+
+#define LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##C = *((__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y));
+
+#define LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##D = *((__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y));
+
+#define LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##E = *((__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y));
+
+#define LOAD_ELEMENT_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##F = *((__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y));
+
+/** @}*/ // end of group LOAD_ELEMENT_n
+
+/** Load Scalar as Vector (consecutive elements).
+ * @name LOAD_SCALAR_AS_VECTOR
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16
+ * The data to load is expected to have consecutive names for each row.
+ * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
+ *
+ * @param[in] M0        The number of consecutive rows
+ * @param[in] N0        The number of consecutive columns
+ * @param[in] DATA_TYPE The data type of the target
+ * @param[in] BASENAME  The basename of the result variables
+ * @param[in] PTR       The base pointer for the data
+ * @param[in] OFFSET    The offset within a row
+ * @param[in] STRIDE_Y  The stride in y-axis direction
+ * @{
+ */
+#define LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
+#define LOAD_SCALAR_AS_VECTOR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
+/** @} */ // end of group LOAD_SCALAR_AS_VECTOR
+
+/** Basic macros to calculate Z offset values from Z0 to Zn-1
+ * @name CALCULATE_Z_OFFSET_n
+ *
+ * @param[in] M0              The number of offset values to calculate
+ * @param[in] DATA_TYPE       The data type of the results
+ * @param[in] Z               The basename of the result variables
+ * @param[in] Y               The work-itme ID of y-axis
+ * @param[in] HEIGHT_GEMM3D   The height of GEMM3D
+ * @param[in] DEPTH_GEMM3D    The depth of GEMM3D
+ * @param[in] CROSS_PLANE_PAD The padding required for plane changes accross the z-dimension
+ * @param[in] STRIDE_Y        The stride value in y-axis direction
+ *
+ * @{
+ */
+#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  Z##0 = (0 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##0 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##0);                                              \
+  Z##0 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##1 = (1 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##1 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##1);                                              \
+  Z##1 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##2 = (2 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##2 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##2);                                              \
+  Z##2 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##3 = (3 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##3 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##3);                                              \
+  Z##3 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##4 = (4 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##4 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##4);                                              \
+  Z##4 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##5 = (5 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##5 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##5);                                              \
+  Z##5 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##6 = (6 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##6 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##6);                                              \
+  Z##6 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_8(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##7 = (7 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##7 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##7);                                              \
+  Z##7 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+/** @} */ // end of group CALCULATE_Z_OFFSET_n
+
+/** Calculate Z offset values from Z0 to Zn-1
+ * @name CALCULATE_Z_OFFSET
+ *
+ * The Z offsets are expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected names of Z offsets are zin1, zin2, zin3.
+ * Note that, CROSS_PLANE_PAD (cross plain padding) is required to take into account
+ * the possible cross plane paddings in case of the plance changes across the z-dimension.
+ *
+ * <!--
+ * |                  |
+ * |      plane0      |
+ * |                  |
+ * |__________________|
+ * |******************|
+ * |  cross_plane_pad |
+ * |******************|
+ * |                  |
+ * |      plane1      |
+ * |                  |
+ * |__________________|
+ * -->
+ *
+ * @param[in] M0              The number of offset values to calculate
+ * @param[in] DATA_TYPE       The data type of the results
+ * @param[in] Z               The basename of the result variables
+ * @param[in] Y               The work-itme ID of y-axis
+ * @param[in] HEIGHT_GEMM3D   The height of GEMM3D
+ * @param[in] DEPTH_GEMM3D    The depth of GEMM3D
+ * @param[in] CROSS_PLANE_PAD The padding required for plane changes accross the z-dimension
+ * @param[in] STRIDE_Y        The stride value in y-axis direction
+ * @{
+ */
+#define CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                               STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,      \
+                          STRIDE_Y)
+#define CALCULATE_Z_OFFSET(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                           STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,   \
+                         STRIDE_Y)
+/** @} */ // end of group CALCULATE_Z_OFFSET
+
+/** Store the 0 to (n-1)th rows of the given variables
+ * @name STORE_ROW_n
+ *
+ * @param[in] N0        The size of the vectors
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] PTR       The base pointer
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The offset in z-axis direction
+ * @{
+ */
+#define STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  VSTORE(N0)                                                   \
+  (BASENAME##0, 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
+
+#define STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##1, 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
+
+#define STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##2, 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
+
+#define STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##3, 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
+
+#define STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##4, 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
+
+#define STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##5, 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
+
+#define STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##6, 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
+
+#define STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##7, 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
+
+#define STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##8, 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
+
+#define STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)        \
+  VSTORE(N0)                                                    \
+  (BASENAME##9, 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
+
+#define STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##A, 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
+
+#define STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##B, 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
+
+#define STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##C, 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
+
+#define STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##D, 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
+
+#define STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##E, 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
+
+#define STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##F, 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
+/** @} */ // end of groupd STORE_ROW_n
+
+/** Convert and store the 0th to (n-1)th rows of the given variables
+ * @name CONVERT_STORE_ROW_n
+ *
+ * @param[in] N0        The size of the vectors
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] PTR       The base pointer
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The offset in z-axis direction
+ * @{
+ */
+#define CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##0), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
+
+#define CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##1), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
+
+#define CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##2), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
+
+#define CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##3), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
+
+#define CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##4), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
+
+#define CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##5), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
+
+#define CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##6), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
+
+#define CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##7), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
+
+#define CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##8), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
+
+#define CONVERT_STORE_ROW_10(N0, DATA, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)   \
+  VSTORE(N0)                                                       \
+  (CONVERT_SAT((BASENAME##9), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,    \
+   (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
+
+#define CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##A), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
+
+#define CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##B), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
+
+#define CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##C), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
+
+#define CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##D), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
+
+#define CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##E), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
+
+#define CONVERT_STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##F), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
+
+/** @} */ // end of groupd CONVERT_STORE_ROW_n
+
+/** Store a block of the given size M0xN0
+ * @name STORE_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=2,3,4,8,16.
+ * The data to store is expected to have consecutive names for each row.
+ * E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.
+ *
+ * @param[in] M0        The number of rows to store
+ * @param[in] N0        The size of each vector
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] PTR       The base pointer
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The offset in z-axis direction
+ * @{
+ */
+#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+/** @} */ // end of group STORE_BLOCK
+
+/** Convert and store a block of the given size M0xN0
+ * @name CONVERT_STORE_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=2,3,4,8,16.
+ * The data to store is expected to have consecutive names for each row.
+ * E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.
+ *
+ * @param[in] M0        The number of rows to store
+ * @param[in] N0        The size of each vector
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] PTR       The base pointer
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The offset in z-axis direction
+ * @{
+ */
+#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+/** @} */ // end of group CONVERT_STORE_BLOCK
+
+/** Scale the rows in the given variables (BASENAME0 to BASENAMEn-1)
+ * @name SCALE_ROW_n
+ *
+ * @param[in] DATA_TYPE The data type of the variables
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] SCALE     The scale factor
+ * @{
+ */
+#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) BASENAME##0 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##1 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##2 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##3 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##4 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##5 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##6 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##7 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##8 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE)        \
+  BASENAME##9 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##A *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##B *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##C *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##D *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##E *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##F *= (DATA_TYPE)SCALE;
+/** @} */ // end of group SCALE_ROW_n
+
+/** Scale elements stored in a block (BASENAME)
+ * @name SCALE_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16
+ *
+ * @param[in] N         The number of rows in the block
+ * @param[in] DATA_TYPE The data type of the block
+ * @param[in] BASENAME  The basename of the block
+ * @param[in] SCALE     The scale factor
+ * @{
+ */
+#define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE) SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)
+#define SCALE_BLOCK(N, DATA_TYPE, BASENAME, SCALE) SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)
+/** @} */ // end of group SCALE_BLOCK
+
+/** Create a new vector containing the values at the given index for a set of given vectors
+ * @name COLUMN_VECTORn
+ *
+ * @param[in] IDX_COL  The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] X        The basename of the source vectors
+ * @param[in] TYPE     The data type of the destination vectors
+ * @{
+ */
+#define COLUMN_VECTOR1(IDX_COL, BASENAME, X, TYPE) \
+  TYPE BASENAME##IDX_COL = (TYPE)((X##0).s##IDX_COL);
+#define COLUMN_VECTOR2(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 2)                           \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0).s##IDX_COL, (X##1).s##IDX_COL);
+#define COLUMN_VECTOR3(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 3)                           \
+  BASENAME##IDX_COL =                              \
+    (VEC_DATA_TYPE(TYPE, 3))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL);
+#define COLUMN_VECTOR4(IDX_COL, BASENAME, X, TYPE)                                   \
+  VEC_DATA_TYPE(TYPE, 4)                                                             \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0).s##IDX_COL, (X##1).s##IDX_COL, \
+                                               (X##2).s##IDX_COL, (X##3).s##IDX_COL);
+#define COLUMN_VECTOR8(IDX_COL, BASENAME, X, TYPE)                                                 \
+  VEC_DATA_TYPE(TYPE, 8)                                                                           \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))(                                                    \
+    (X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
+    (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL);
+#define COLUMN_VECTOR16(IDX_COL, BASENAME, X, TYPE)                                                \
+  VEC_DATA_TYPE(TYPE, 16)                                                                          \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))(                                                   \
+    (X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
+    (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, \
+    (X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, \
+    (X##F).s##IDX_COL);
+/** @} */ // end of group COLUMN_VECTORn
+
+/** Create a new vector containing the values at the given index. Utility macros for transposing a
+ * colum-vector
+ * @name COLUMN_VECTOR_SCALARn
+ *
+ * @param[in] IDX_COL  The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] X        The basename of the source vectors
+ * @param[in] TYPE     The data type of the destination vectors
+ * @{
+ */
+#define COLUMN_VECTOR_SCALAR1(IDX_COL, BASENAME, X, TYPE) TYPE BASENAME##IDX_COL = (TYPE)((X##0));
+#define COLUMN_VECTOR_SCALAR2(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 2)                                  \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0), (X##1));
+#define COLUMN_VECTOR_SCALAR3(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 3)                                  \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0), (X##1), (X##2));
+#define COLUMN_VECTOR_SCALAR4(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 4)                                  \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0), (X##1), (X##2), (X##3));
+#define COLUMN_VECTOR_SCALAR8(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 8)                                  \
+  BASENAME##IDX_COL =                                     \
+    (VEC_DATA_TYPE(TYPE, 8))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7));
+#define COLUMN_VECTOR_SCALAR16(IDX_COL, BASENAME, X, TYPE)                                    \
+  VEC_DATA_TYPE(TYPE, 16)                                                                     \
+  BASENAME##IDX_COL =                                                                         \
+    (VEC_DATA_TYPE(TYPE, 16))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7), \
+                              (X##8), (X##9), (X##A), (X##B), (X##C), (X##D), (X##E), (X##F));
+/** @} */ // end of group COLUMN_VECTORn
+
+/** Create transposed vectors of the given vectors
+ * @name TRANSPOSE_K0Xn
+ *
+ * @param[in] K0       The size of the source vectors
+ * @param[in] BASENAME The basename of transposed vectors
+ * @param[in] B        The basename of source vectors for transposition
+ * @param[in] TYPE     The data type of the transposed vectors
+ * @{
+ */
+#define TRANSPOSE_K0X1(K0, BASENAME, B, TYPE) COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X2(K0, BASENAME, B, TYPE) \
+  COLUMN_VECTOR(K0, 0, BASENAME, B, TYPE);    \
+  COLUMN_VECTOR(K0, 1, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X3(K0, BASENAME, B, TYPE) \
+  TRANSPOSE_K0X2(K0, BASENAME, B, TYPE);      \
+  COLUMN_VECTOR(K0, 2, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X4(K0, BASENAME, B, TYPE) \
+  TRANSPOSE_K0X3(K0, BASENAME, B, TYPE);      \
+  COLUMN_VECTOR(K0, 3, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X8(K0, BASENAME, B, TYPE) \
+  TRANSPOSE_K0X4(K0, BASENAME, B, TYPE);      \
+  COLUMN_VECTOR(K0, 4, BASENAME, B, TYPE);    \
+  COLUMN_VECTOR(K0, 5, BASENAME, B, TYPE);    \
+  COLUMN_VECTOR(K0, 6, BASENAME, B, TYPE);    \
+  COLUMN_VECTOR(K0, 7, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X16(K0, BASENAME, B, TYPE) \
+  TRANSPOSE_K0X8(K0, BASENAME, B, TYPE);       \
+  COLUMN_VECTOR(K0, 8, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, 9, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, A, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, B, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, C, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, D, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, E, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, F, BASENAME, B, TYPE);
+
+/** @} */ // end of group TRANSPOSE_K0Xn
+
+/** Create column vectors to contain the values at the given index for a set of given vectors
+ *
+ * @param[in] K0       The number of source vectors
+ * @param[in] IDX_COL  The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] B        The basename of the source vectors
+ * @param[in] TYPE     The data type of the destination vectors
+ */
+#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, B, TYPE) \
+  CONCAT(COLUMN_VECTOR, K0)                           \
+  (IDX_COL, BASENAME, B, TYPE);
+
+/** Create column vectors to contain the values at the given index. Utility macro for transposing a
+ * column-vector
+ *
+ * @param[in] K0       The number of source vectors
+ * @param[in] IDX_COL  The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] B        The basename of the source vectors
+ * @param[in] TYPE     The data type of the destination vectors
+ */
+#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, B, TYPE) \
+  CONCAT(COLUMN_VECTOR_SCALAR, K0)                           \
+  (IDX_COL, BASENAME, B, TYPE);
+
+/** Create transposed vectors form the given source vectors
+ *
+ * @param[in] K0       The size of source vectors
+ * @param[in] N0       The number of source vectors
+ * @param[in] BASENAME The basename of transposed vectors
+ * @param[in] B        The basename of source vectors for transposition
+ * @param[in] TYPE     The data type of the transposed vectors
+ *
+ */
+#define TRANSPOSE_K0XN0(K0, N0, BASENAME, B, TYPE) \
+  CONCAT(TRANSPOSE_K0X, N0)                        \
+  (K0, BASENAME, B, TYPE);
+
+/** Add the variables (BIAS0 to BIASn-1) to the others (BASENAME0 to BASENAMEn-1)
+ * @name ADD_ROW_n
+ *
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS     The basename of the added variables
+ * @{
+ */
+#define ADD_ROW_1(BASENAME, BIAS) BASENAME##0 += BIAS##0;
+
+#define ADD_ROW_2(BASENAME, BIAS) \
+  ADD_ROW_1(BASENAME, BIAS)       \
+  BASENAME##1 += BIAS##1;
+
+#define ADD_ROW_3(BASENAME, BIAS) \
+  ADD_ROW_2(BASENAME, BIAS)       \
+  BASENAME##2 += BIAS##2;
+
+#define ADD_ROW_4(BASENAME, BIAS) \
+  ADD_ROW_3(BASENAME, BIAS)       \
+  BASENAME##3 += BIAS##3;
+
+#define ADD_ROW_5(BASENAME, BIAS) \
+  ADD_ROW_4(BASENAME, BIAS)       \
+  BASENAME##4 += BIAS##4;
+
+#define ADD_ROW_6(BASENAME, BIAS) \
+  ADD_ROW_5(BASENAME, BIAS)       \
+  BASENAME##5 += BIAS##5;
+
+#define ADD_ROW_7(BASENAME, BIAS) \
+  ADD_ROW_6(BASENAME, BIAS)       \
+  BASENAME##6 += BIAS##6;
+
+#define ADD_ROW_8(BASENAME, BIAS) \
+  ADD_ROW_7(BASENAME, BIAS)       \
+  BASENAME##7 += BIAS##7;
+
+#define ADD_ROW_9(BASENAME, BIAS) \
+  ADD_ROW_8(BASENAME, BIAS)       \
+  BASENAME##8 += BIAS##8;
+
+#define ADD_ROW_10(BASENAME, BIAS) \
+  ADD_ROW_9(BASENAME, BIAS)        \
+  BASENAME##9 += BIAS##9;
+
+#define ADD_ROW_11(BASENAME, BIAS) \
+  ADD_ROW_10(BASENAME, BIAS)       \
+  BASENAME##A += BIAS##A;
+
+#define ADD_ROW_12(BASENAME, BIAS) \
+  ADD_ROW_11(BASENAME, BIAS)       \
+  BASENAME##B += BIAS##B;
+
+#define ADD_ROW_13(BASENAME, BIAS) \
+  ADD_ROW_12(BASENAME, BIAS)       \
+  BASENAME##C += BIAS##C;
+
+#define ADD_ROW_14(BASENAME, BIAS) \
+  ADD_ROW_13(BASENAME, BIAS)       \
+  BASENAME##D += BIAS##D;
+
+#define ADD_ROW_15(BASENAME, BIAS) \
+  ADD_ROW_14(BASENAME, BIAS)       \
+  BASENAME##E += BIAS##E;
+
+#define ADD_ROW_16(BASENAME, BIAS) \
+  ADD_ROW_15(BASENAME, BIAS)       \
+  BASENAME##F += BIAS##F;
+
+/** @} */ // end of group ADD_ROW_n
+
+/** Add the block (BIAS) to another block (BASENAME)
+ * @name ADD_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16
+ *
+ * @param[in] N        The number of vectors in the block
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS     The basename of the added variables
+ * @{
+ */
+#define ADD_BLOCK_STR(N, BASENAME, BIAS) ADD_ROW_##N(BASENAME, BIAS)
+#define ADD_BLOCK(N, BASENAME, BIAS) ADD_BLOCK_STR(N, BASENAME, BIAS)
+/** @} */ // end of group ADD_BLOCK
+
+/** Broadcast (add single value) to the each element of the destination variables
+ * @name ADD_ROW_BROADCAST_n
+ *
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS     The variable containing the value to add
+ * @{
+ */
+#define ADD_ROW_BROADCAST_1(BASENAME, BIAS) BASENAME##0 += BIAS;
+
+#define ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_1(BASENAME, BIAS)       \
+  BASENAME##1 += BIAS;
+
+#define ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_2(BASENAME, BIAS)       \
+  BASENAME##2 += BIAS;
+
+#define ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_3(BASENAME, BIAS)       \
+  BASENAME##3 += BIAS;
+
+#define ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_4(BASENAME, BIAS)       \
+  BASENAME##4 += BIAS;
+
+#define ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_5(BASENAME, BIAS)       \
+  BASENAME##5 += BIAS;
+
+#define ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_6(BASENAME, BIAS)       \
+  BASENAME##6 += BIAS;
+
+#define ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_7(BASENAME, BIAS)       \
+  BASENAME##7 += BIAS;
+
+#define ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_8(BASENAME, BIAS)       \
+  BASENAME##8 += BIAS;
+
+#define ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_9(BASENAME, BIAS)        \
+  BASENAME##9 += BIAS;
+
+#define ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_10(BASENAME, BIAS)       \
+  BASENAME##A += BIAS;
+
+#define ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_11(BASENAME, BIAS)       \
+  BASENAME##B += BIAS;
+
+#define ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_12(BASENAME, BIAS)       \
+  BASENAME##C += BIAS;
+
+#define ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_13(BASENAME, BIAS)       \
+  BASENAME##D += BIAS;
+
+#define ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_14(BASENAME, BIAS)       \
+  BASENAME##E += BIAS;
+
+#define ADD_ROW_BROADCAST_16(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_15(BASENAME, BIAS)       \
+  BASENAME##F += BIAS;
+
+/** Broadcast (add a value) to the each element of the destination block (BASENAME)
+ * @name ADD_BLOCK_BROADCAST
+ *
+ * Supported cases are N=1,2,3,...,16.
+ *
+ * @param[in] N        The number of vectors in the block
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS     The variable containing the value to add
+ * @{
+ */
+#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) ADD_ROW_BROADCAST_##N(BASENAME, BIAS)
+#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS) ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)
+/** @} */ // end of group ADD_BLOCK_BROADCAST
+
+/** Apply activation to the given variables
+ * @name ACTIVATION_ROW_n
+ *
+ * @param[in] ACTIVATION_TYPE The type of the activation
+ * @param[in] DATA_TYPE       The data type of the vectors
+ * @param[in] BASENAME        The basename of the variables
+ * @param[in] A_VAL           Additional value required by the activation
+ * @param[in] B_VAL           Additional value required by the activation
+ * @{
+ */
+#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##0, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##1, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##2, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##3, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##4, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##5, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##6, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##7, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##8, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)        \
+  BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##9, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##A, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##B, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##C, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##D, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##E, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##F, A_VAL, B_VAL);
+/** @} */ // end of group ACTIVATION_ROW_n
+
+/** Apply activation to a block (BASENAME)
+ * @name ACTIVATION_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16.
+ *
+ * @param[in] N               The number of vectors in the block
+ * @param[in] ACTIVATION_TYPE The type of the activation
+ * @param[in] DATA_TYPE       The data type of the vectors
+ * @param[in] BASENAME        The basename of the variables
+ * @param[in] A_VAL           Additional value required by the activation
+ * @param[in] B_VAL           Additional value required by the activation
+ * @{
+ */
+#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
+#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
+/** @} */ // end of group ACTIVATION_BLOCK
+
+/** Apply convert_<data_type> to the given variables
+ * @name CONVERT_ROW_n
+ *
+ * @param[in] N            The size of the vectors
+ * @param[in] DATA_TYPE    The data type of the vectors
+ * @param[in] BASENAME_SRC The basename of the source variables
+ * @param[in] BASENAME_DST The basename of the destination variables
+ */
+#define CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##0 = CONVERT(BASENAME_SRC##0, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##1 = CONVERT(BASENAME_SRC##1, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##2 = CONVERT(BASENAME_SRC##2, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##3 = CONVERT(BASENAME_SRC##3, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##4 = CONVERT(BASENAME_SRC##4, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##5 = CONVERT(BASENAME_SRC##5, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##6 = CONVERT(BASENAME_SRC##6, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##7 = CONVERT(BASENAME_SRC##7, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##8 = CONVERT(BASENAME_SRC##8, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)        \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##9 = CONVERT(BASENAME_SRC##9, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##A = CONVERT(BASENAME_SRC##A, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##B = CONVERT(BASENAME_SRC##B, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##C = CONVERT(BASENAME_SRC##C, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##D = CONVERT(BASENAME_SRC##D, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##E = CONVERT(BASENAME_SRC##E, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_16(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##F = CONVERT(BASENAME_SRC##F, VEC_DATA_TYPE(DATA_TYPE, N));
+/** @} */ // end of group CONVERT_ROW_n
+
+/** Apply convert_<data_type> to a block (BASENAME_SRC) and save to another block (BASENAME_DST)
+ * @name CONVERT_BLOCK
+ *
+ * Supported cases N=1,2,3,...,16.
+ *
+ * @param[in] M            The number of vectors to convert
+ * @param[in] N            The size of the vectors
+ * @param[in] DATA_TYPE    The data type of the vectors
+ * @param[in] BASENAME_SRC The basename of the source variables
+ * @param[in] BASENAME_DST The basename of the destination variables
+ */
+#define CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_##M(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
+#define CONVERT_BLOCK(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
+/** @} */ // end of group CONVERT_BLOCK
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp.cl

new file mode 100644 (file)

index 0000000..2d9acc7
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp.cl
@@ -0,0 +1,2733 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "gemm_helpers.h"
+#include "helpers_asymm.h"
+#include "repeat.h"
+
+#if defined(DATA_TYPE) && defined(ACC_DATA_TYPE)
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
+  defined(cl_arm_integer_dot_product_accumulate_int8)
+#define ARM_DOT(x, y, val) val = arm_dot_acc((x), (y), (val));
+#else // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
+      // defined(cl_arm_integer_dot_product_accumulate_int8)
+#define ARM_DOT(x, y, val) val += arm_dot((x), (y));
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
+       // defined(cl_arm_integer_dot_product_accumulate_int8)
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform the dot product instruction between two vectors of size N [1,16].
+ * These macros use the dot8 instruction */
+#define ARM_DOT1(a, b, c)                                                         \
+  ({                                                                              \
+    ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (VEC_DATA_TYPE(DATA_TYPE, 3))0),     \
+            (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (VEC_DATA_TYPE(DATA_TYPE, 3))0), c); \
+  })
+#define ARM_DOT2(a, b, c)                                                         \
+  ({                                                                              \
+    ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (VEC_DATA_TYPE(DATA_TYPE, 2))0),     \
+            (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (VEC_DATA_TYPE(DATA_TYPE, 2))0), c); \
+  })
+#define ARM_DOT3(a, b, c)                                       \
+  ({                                                            \
+    ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (DATA_TYPE)0),     \
+            (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (DATA_TYPE)0), c); \
+  })
+#define ARM_DOT4(a, b, c) ({ ARM_DOT(a, b, c); })
+#define ARM_DOT8(a, b, c)        \
+  ({                             \
+    ARM_DOT4((a.lo), (b.lo), c); \
+    ARM_DOT4((a.hi), (b.hi), c); \
+  })
+#define ARM_DOT16(a, b, c)       \
+  ({                             \
+    ARM_DOT8((a.lo), (b.lo), c); \
+    ARM_DOT8((a.hi), (b.hi), c); \
+  })
+
+#else // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform the dot product instruction between two vectors of size K0 [1,16]
+ * without using the dot8 instruction. */
+#define ARM_DOT1(a, b, c) ({ c += (ACC_DATA_TYPE)a * b; })
+#define ARM_DOT2(a, b, c)            \
+  ({                                 \
+    c += (ACC_DATA_TYPE)a.s0 * b.s0; \
+    c += (ACC_DATA_TYPE)a.s1 * b.s1; \
+  })
+#define ARM_DOT3(a, b, c)            \
+  ({                                 \
+    ARM_DOT2(a, b, c);               \
+    c += (ACC_DATA_TYPE)a.s2 * b.s2; \
+  })
+#define ARM_DOT4(a, b, c)            \
+  ({                                 \
+    ARM_DOT3(a, b, c);               \
+    c += (ACC_DATA_TYPE)a.s3 * b.s3; \
+  })
+#define ARM_DOT8(a, b, c)        \
+  ({                             \
+    ARM_DOT4((a.lo), (b.lo), c); \
+    ARM_DOT4((a.hi), (b.hi), c); \
+  })
+#define ARM_DOT16(a, b, c)       \
+  ({                             \
+    ARM_DOT8((a.lo), (b.lo), c); \
+    ARM_DOT8((a.hi), (b.hi), c); \
+  })
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform a broadcast dot product operation between one vector "a" and N0
+ * vectors "b" of size K0 [1,16] */
+#define ARM_DOT_K0X1(k0, a, b, c) ({ ARM_DOT_K0(k0, (a), (b##0), (c)); })
+#define ARM_DOT_K0X2(k0, a, b, c)        \
+  ({                                     \
+    ARM_DOT_K0(k0, (a), (b##0), (c.s0)); \
+    ARM_DOT_K0(k0, (a), (b##1), (c.s1)); \
+  })
+#define ARM_DOT_K0X3(k0, a, b, c)        \
+  ({                                     \
+    ARM_DOT_K0X2(k0, a, b, c);           \
+    ARM_DOT_K0(k0, (a), (b##2), (c.s2)); \
+  })
+#define ARM_DOT_K0X4(k0, a, b, c)        \
+  ({                                     \
+    ARM_DOT_K0X3(k0, a, b, c);           \
+    ARM_DOT_K0(k0, (a), (b##3), (c.s3)); \
+  })
+#define ARM_DOT_K0X8(k0, a, b, c)        \
+  ({                                     \
+    ARM_DOT_K0X4(k0, a, b, c);           \
+    ARM_DOT_K0(k0, (a), (b##4), (c.s4)); \
+    ARM_DOT_K0(k0, (a), (b##5), (c.s5)); \
+    ARM_DOT_K0(k0, (a), (b##6), (c.s6)); \
+    ARM_DOT_K0(k0, (a), (b##7), (c.s7)); \
+  })
+#define ARM_DOT_K0X16(k0, a, b, c)       \
+  ({                                     \
+    ARM_DOT_K0X8(k0, a, b, c);           \
+    ARM_DOT_K0(k0, (a), (b##8), (c.s8)); \
+    ARM_DOT_K0(k0, (a), (b##9), (c.s9)); \
+    ARM_DOT_K0(k0, (a), (b##A), (c.sA)); \
+    ARM_DOT_K0(k0, (a), (b##B), (c.sB)); \
+    ARM_DOT_K0(k0, (a), (b##C), (c.sC)); \
+    ARM_DOT_K0(k0, (a), (b##D), (c.sD)); \
+    ARM_DOT_K0(k0, (a), (b##E), (c.sE)); \
+    ARM_DOT_K0(k0, (a), (b##F), (c.sF)); \
+  })
+
+/** Specialized macros to perform a partial matrix multiplication with dimensions M0,N0,K0 */
+#define ARM_MM_K0XN0X1(n0, k0, a, b, c) ({ ARM_DOT_K0XN0(n0, k0, (a##0), b, (c##0)); })
+#define ARM_MM_K0XN0X2(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X1(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##1), b, (c##1)); \
+  })
+#define ARM_MM_K0XN0X3(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X2(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##2), b, (c##2)); \
+  })
+#define ARM_MM_K0XN0X4(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X3(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##3), b, (c##3)); \
+  })
+#define ARM_MM_K0XN0X5(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X4(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##4), b, (c##4)); \
+  })
+#define ARM_MM_K0XN0X6(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X5(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##5), b, (c##5)); \
+  })
+#define ARM_MM_K0XN0X7(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X6(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##6), b, (c##6)); \
+  })
+#define ARM_MM_K0XN0X8(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X7(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##7), b, (c##7)); \
+  })
+
+#define ARM_DOT_K0(k0, a, b, c) \
+  ({                            \
+    CONCAT(ARM_DOT, k0)         \
+    ((a), (b), (c));            \
+  })
+
+#define ARM_DOT_K0XN0(n0, k0, a, b, c) \
+  ({                                   \
+    CONCAT(ARM_DOT_K0X, n0)            \
+    (k0, (a), b, (c));                 \
+  })
+
+#define ARM_MM_K0XN0XM0(m0, n0, k0, a, b, c) \
+  ({                                         \
+    CONCAT(ARM_MM_K0XN0X, m0)                \
+    (n0, k0, a, b, c);                       \
+  })
+
+/** Specialized macros to perform a broadcast dot product operation between one vector "a" and N0
+ * vectors "b" of size K0 [1,16] */
+#define ARM_MUL_N0X1(VECTOR_ACC_TYPE, a, b, c) ({ c += CONVERT(b##0, VECTOR_ACC_TYPE) * a; })
+#define ARM_MUL_N0X2(VECTOR_ACC_TYPE, a, b, c)    \
+  ({                                              \
+    c += CONVERT(b##0, VECTOR_ACC_TYPE) * a.s##0; \
+    c += CONVERT(b##1, VECTOR_ACC_TYPE) * a.s##1; \
+  })
+#define ARM_MUL_N0X3(VECTOR_ACC_TYPE, a, b, c)    \
+  ({                                              \
+    ARM_MUL_N0X2(VECTOR_ACC_TYPE, a, b, c);       \
+    c += CONVERT(b##2, VECTOR_ACC_TYPE) * a.s##2; \
+  })
+#define ARM_MUL_N0X4(VECTOR_ACC_TYPE, a, b, c)    \
+  ({                                              \
+    ARM_MUL_N0X3(VECTOR_ACC_TYPE, a, b, c);       \
+    c += CONVERT(b##3, VECTOR_ACC_TYPE) * a.s##3; \
+  })
+#define ARM_MUL_N0X8(VECTOR_ACC_TYPE, a, b, c)    \
+  ({                                              \
+    ARM_MUL_N0X4(VECTOR_ACC_TYPE, a, b, c);       \
+    c += CONVERT(b##4, VECTOR_ACC_TYPE) * a.s##4; \
+    c += CONVERT(b##5, VECTOR_ACC_TYPE) * a.s##5; \
+    c += CONVERT(b##6, VECTOR_ACC_TYPE) * a.s##6; \
+    c += CONVERT(b##7, VECTOR_ACC_TYPE) * a.s##7; \
+  })
+#define ARM_MUL_N0X16(VECTOR_ACC_TYPE, a, b, c)   \
+  ({                                              \
+    ARM_MUL_N0X8(VECTOR_ACC_TYPE, a, b, c);       \
+    c += CONVERT(b##8, VECTOR_ACC_TYPE) * a.s##8; \
+    c += CONVERT(b##9, VECTOR_ACC_TYPE) * a.s##9; \
+    c += CONVERT(b##A, VECTOR_ACC_TYPE) * a.s##A; \
+    c += CONVERT(b##B, VECTOR_ACC_TYPE) * a.s##B; \
+    c += CONVERT(b##C, VECTOR_ACC_TYPE) * a.s##C; \
+    c += CONVERT(b##D, VECTOR_ACC_TYPE) * a.s##D; \
+    c += CONVERT(b##E, VECTOR_ACC_TYPE) * a.s##E; \
+    c += CONVERT(b##F, VECTOR_ACC_TYPE) * a.s##F; \
+  })
+/** Specialized macros to perform a a partial matrix multiplication with dimensions M0,N0,K0 */
+#define ARM_MM_NATIVE_N0XK0X1(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##0), b, (c##0)); })
+#define ARM_MM_NATIVE_N0XK0X2(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X1(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##1), b, (c##1));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X3(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X2(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##2), b, (c##2));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X4(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X3(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##3), b, (c##3));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X5(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X4(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##4), b, (c##4));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X6(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X5(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##5), b, (c##5));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X7(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X6(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##6), b, (c##6));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X8(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X7(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##7), b, (c##7));  \
+  })
+#define ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                \
+    CONCAT(ARM_MUL_N0X, k0)                         \
+    (VECTOR_ACC_TYPE, (a), b, (c));                 \
+  })
+#define ARM_MM_NATIVE_N0XK0XM0(VECTOR_ACC_TYPE, m0, k0, a, b, c) \
+  ({                                                             \
+    CONCAT(ARM_MM_NATIVE_N0XK0X, m0)                             \
+    (VECTOR_ACC_TYPE, k0, a, b, c);                              \
+  })
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(M) && \
+  defined(N)
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices with
+ * QASYMM/QASYMM_SIGNED data type. The LHS matrix must be reshaped with @ref
+ * CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be NOT transposed The RHS matrix must be reshaped
+ * with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0 must be transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (i.e. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (i.e. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (i.e. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - V0 >= 1
+ *  - H0 >= 1
+ *
+ * @note In case the output has to be reinterpreted as a 3D tensor (i.e. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in]  lhs_ptr                           Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8/QASYMM_SIGNED
+ * @param[in]  lhs_stride_x                      Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_stride_y                      Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in]  rhs_ptr                           Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                      Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_stride_y                      Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  k                                 Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in]  lhs_stride_z                      Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                      Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_cross_plane_pad               (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+                                                IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+                                                uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                ,
+                                                uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (K0)
+#define LHS_STEP_X ((K0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (K0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  __global DATA_TYPE *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+                                 (y % V0) * (uint)LHS_OFFSET_X + (y / V0) * (uint)lhs_stride_y +
+                                 (z * lhs_stride_z);
+
+  // Compute RHS matrix address
+  __global DATA_TYPE *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+                                 (x % H0) * (uint)RHS_OFFSET_X + (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_addr += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_addr += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+  for (int i = 0; i < k; i += K0)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_addr, 0, LHS_STEP_X, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_addr, 0, RHS_STEP_X, zrhs);
+
+    // Partial matrix multiplication M0,N0,K0
+    ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+    // Update address
+    lhs_addr += (M0 * LHS_STEP_X * LHS_STEP_LOOP);
+    rhs_addr += (N0 * RHS_STEP_X * RHS_STEP_LOOP);
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(int)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Convert and store output block
+  CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(K)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS matrix is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is
+ * transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (i.e. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - H0 >= 1
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                           Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  lhs_stride_x                      Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_stride_y                      Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in]  rhs_ptr                           Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                      Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_stride_y                      Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                      Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                      Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  lhs_cross_plane_pad               (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad               (Optional) Bottom paddings for the output matrix in
+ * unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+                                              IMAGE_DECLARATION(dst), uint lhs_stride_z,
+                                              uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                              ,
+                                              uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                              ,
+                                              uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes + (x % H0) * (uint)RHS_OFFSET_X +
+                    (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+  for (int i = 0; i < K; i += K0)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X, zrhs);
+
+    // Partial matrix multiplication M0,N0,K0
+    ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+    lhs_offset += K0;
+    rhs_offset += N0 * RHS_STEP_X * RHS_STEP_LOOP;
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0) * sizeof(int) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Convert and store output block
+  CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#if defined(RESULT_OFFSET) && defined(RESULT_SHIFT) && defined(RESULT_MULTIPLIER)
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices with fused output stage
+ * using fixed-point arithmetic. The LHS matrix is NOT reshaped The RHS matrix is reshaped with @ref
+ * CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (i.e. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - H0 >= 1
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @note The offset, scalar scale factor and number of bits to shift right of output tensor must be
+ * passed at compile time using -DRESULT_OFFSET, -RESULT_MULTIPLIER and -DRESULT_SHIFT
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ * @note In case of per-channel quantization of matrix B, -DPER_CHANNEL_QUANTIZATION must be passed
+ * at compile time.
+ *
+ * @param[in]  lhs_ptr                                          Pointer to the LHS reshaped matrix.
+ * Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  lhs_stride_x                                     Stride of the LHS reshaped matrix in
+ * X dimension (in bytes)
+ * @param[in]  lhs_step_x                                       src_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                                     Stride of the LHS reshaped matrix in
+ * Y dimension (in bytes)
+ * @param[in]  lhs_step_y                                       src_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes                The offset of the first element in
+ * the LHS reshaped matrix
+ * @param[in]  rhs_ptr                                          Pointer to the RHS reshaped matrix.
+ * Supported data type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                                     Stride of the RHS reshaped matrix in
+ * X dimension (in bytes)
+ * @param[in]  rhs_step_x                                       src_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                                     Stride of the RHS reshaped matrix in
+ * Y dimension (in bytes)
+ * @param[in]  rhs_step_y                                       src_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes                The offset of the first element in
+ * the RHS reshaped matrix
+ * @param[out] dst_ptr                                          Pointer to the destination matrix
+ * Supported data type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                                     Stride of the destination matrix in
+ * X dimension (in bytes)
+ * @param[in]  dst_step_x                                       dst_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                                     Stride of the destination matrix in
+ * Y dimension (in bytes)
+ * @param[in]  dst_step_y                                       dst_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes                The offset of the first element in
+ * the destination matrix
+ * @param[in]  lhs_stride_z                                     Stride of the LHS reshaped matrix in
+ * Z dimension (in bytes)
+ * @param[in]  rhs_stride_z                                     Stride of the RHS reshaped matrix in
+ * Z dimension (in bytes)
+ * @param[in]  dst_stride_z                                     Stride of the destination tensor in
+ * Z dimension (in bytes)
+ * @param[in]  lhs_cross_plane_pad                              (Optional) Bottom paddings for LHS
+ * matrix in unit of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                              (Optional) Bottom paddings for the
+ * output matrix in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ * @param[in]  sum_col_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: S32
+ * @param[in]  sum_col_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_col_step_x                                   (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_col_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_col_step_y                                   (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_col_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  sum_row_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: S32
+ * @param[in]  sum_row_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_row_step_x                                   (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_row_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_row_step_y                                   (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_row_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  biases_ptr                                       (Optional) Pointer to the biases
+ * tensor. Supported data type: S32
+ * @param[in]  biases_stride_x                                  (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                                    (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes             (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[in]  result_multipliers_ptr                           (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_multipliers_stride_x                      (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in]  result_multipliers_step_x                        (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in]  result_shifts_ptr                                (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_shifts_stride_x                           (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in]  result_shifts_step_x                             (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_shifts_offset_first_element_in_bytes      (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint(
+  IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs), IMAGE_DECLARATION(dst), uint lhs_stride_z,
+  uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+  ,
+  uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  ,
+  uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+#if defined(A_OFFSET)
+  ,
+  IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+    ,
+  IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+    ,
+  VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS)
+#if defined(PER_CHANNEL_QUANTIZATION)
+    ,
+  VECTOR_DECLARATION(result_multipliers), VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes + (x % H0) * (uint)RHS_OFFSET_X +
+                    (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+  for (int i = 0; i < K; i += K0)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X, zrhs);
+
+    // Partial matrix multiplication M0,N0,K0
+    ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+    lhs_offset += K0;
+    rhs_offset += N0 * RHS_STEP_X * RHS_STEP_LOOP;
+  }
+
+  // Result of MM is of type DATA_TYPE
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0) * sizeof(DATA_TYPE) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Convert result of matrix multiplication to S32
+  REPEAT_VAR_INIT_CONVERT_SAT(M0, VEC_DATA_TYPE(int, N0), c, c_int);
+
+  int batch_id = z;
+#if defined(DEPTH_GEMM3D)
+  batch_id /= (int)DEPTH_GEMM3D;
+#endif // defined(DEPTH_GEMM3D)
+
+  // Offset contribution: c += (A_OFFSET * sum_col) + (B_OFFSET * sum_row) +  K_OFFSET;
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(int, N0), offset_s32_, K_OFFSET);
+
+#if defined(A_OFFSET)
+  // Compute the offset contribution due to A_OFFSET
+  __global uchar *sum_col_addr =
+    sum_col_ptr + sum_col_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+#if defined(SUM_COL_HAS_BATCHES)
+  sum_col_addr += z * sum_col_stride_y;
+#endif // defined(SUM_COL_HAS_BATCHES)
+  VEC_DATA_TYPE(int, N0)
+  a_offset_s32 = VLOAD(N0)(0, (__global int *)sum_col_addr);
+  a_offset_s32 *= (VEC_DATA_TYPE(int, N0))A_OFFSET;
+
+  REPEAT_ADD_VECTOR_TO_VAR(M0, offset_s32_, a_offset_s32);
+#endif // defined(A_OFFSET)
+
+#if defined(B_OFFSET)
+  // Compute the offset contribution due to B_OFFSET
+  __global uchar *sum_row_addr = sum_row_ptr + sum_row_offset_first_element_in_bytes +
+                                 (y * (uint)M0) * sizeof(int) + z * sum_row_stride_y;
+
+#if defined(HEIGHT_GEMM3D) && defined(DEPTH_GEMM3D)
+  sum_row_addr += (batch_id % (int)DEPTH_GEMM3D) * (int)HEIGHT_GEMM3D * sizeof(int);
+#endif // defined(HEIGHT_GEMM3D) && defined(DEPTH_GEMM3D)
+  LOAD_SCALAR_AS_VECTOR(M0, N0, int, b_offset_s32_, sum_row_addr, 0, sum_row_stride_x);
+
+  REPEAT_MLA_VAR_WITH_CONST_VEC(M0, offset_s32_, b_offset_s32_, (VEC_DATA_TYPE(int, N0))B_OFFSET);
+#endif // defined(B_OFFSET)
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr =
+    biases_ptr + biases_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+  VEC_DATA_TYPE(int, N0)
+  bias_values = VLOAD(N0)(0, (__global int *)bias_addr);
+  REPEAT_ADD_VECTOR_TO_VAR(M0, offset_s32_, bias_values);
+#endif // defined(ADD_BIAS)
+
+  REPEAT_ADD_TWO_VARS(M0, c_int, offset_s32_);
+
+  // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+  __global uchar *result_multipliers_addr = result_multipliers_ptr +
+                                            result_multipliers_offset_first_element_in_bytes +
+                                            (x * (uint)N0) * sizeof(int);
+  __global uchar *result_shifts_addr =
+    result_shifts_ptr + result_shifts_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+  VEC_DATA_TYPE(int, N0)
+  res_mul = VLOAD(N0)(0, (__global int *)result_multipliers_addr);
+  VEC_DATA_TYPE(int, N0)
+  res_shift = VLOAD(N0)(0, (__global int *)result_shifts_addr);
+
+  REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL(M0, N0, c_int, res_mul, res_shift);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+
+#if RESULT_SHIFT < 0
+  REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(M0, N0, c_int, RESULT_MULTIPLIER,
+                                                         RESULT_SHIFT);
+#else  // RESULT_SHIFT >= 0
+  REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(M0, N0, c_int, RESULT_MULTIPLIER,
+                                                      RESULT_SHIFT);
+#endif // RESULT_SHIFT < 0
+
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+  // Add the offset terms to GEMM's result
+  REPEAT_ADD_CONST_TO_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, RESULT_OFFSET);
+
+#if defined(MIN_BOUND)
+  REPEAT_MAX_CONST_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  REPEAT_MIN_CONST_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Convert and store output block (does convert saturate)
+  CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c_int, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_SHIFT) && defined(RESULT_MULTIPLIER)
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) &&
+       // defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(K)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS matrix is NOT reshaped
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of N0 columns to process must be passed at compile time using -DN0 (i.e. -DN0=2)
+ * @note The number of K0 partial accumulations must be passed at compile time using -DK0 (i.e.,
+ * -DK0=2)
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                           Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8
+ * @param[in]  lhs_stride_x                      Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_stride_y                      Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in]  rhs_ptr                           Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                      Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_stride_y                      Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                      Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                      Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  lhs_cross_plane_pad               (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad               (Optional) Bottom paddings for the output matrix in
+ * unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+                                 IMAGE_DECLARATION(dst), uint lhs_stride_z, uint rhs_stride_z,
+                                 uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                 ,
+                                 uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                 ,
+                                 uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes + x * N0;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0);
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+  int i = 0;
+
+  for (; i <= (K - K0); i += K0)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(K0, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zrhs);
+
+    // Partial matrix multiplication M0,N0,K0
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+    ARM_MM_NATIVE_N0XK0XM0(VEC_DATA_TYPE(ACC_DATA_TYPE, N0), M0, K0, a, b, c);
+#else  // GPU_ARCH == GPU_ARCH_MIDGARD
+       // Transpose the values from RHS matrix
+    TRANSPOSE_K0XN0(K0, N0, b_t, b, DATA_TYPE);
+
+    ARM_MM_K0XN0XM0(M0, N0, K0, a, b_t, c);
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+    // Update the offset
+    lhs_offset += K0;
+    rhs_offset += K0 * rhs_stride_y;
+  }
+
+  // Left-over for loop
+  for (; i < K; ++i)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, 1, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(1, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zrhs);
+
+    // Partial matrix multiplication M0,N0,1
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+    ARM_MM_NATIVE_N0XK0XM0(VEC_DATA_TYPE(ACC_DATA_TYPE, N0), M0, 1, a, b, c);
+#else  // GPU_ARCH == GPU_ARCH_MIDGARD
+       // Transpose the values from RHS matrix
+    TRANSPOSE_K0XN0(1, N0, b_t, b, DATA_TYPE);
+
+    ARM_MM_K0XN0XM0(M0, N0, 1, a, b_t, c);
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+    // Update the offset
+    lhs_offset += 1;
+    rhs_offset += rhs_stride_y;
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0) * sizeof(int) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Convert and store output block
+  CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(K)
+
+#if defined(COLS_A)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix
+ * A. It is also possible to multiply each reduced row by a scalar value, if SCALAR is passed at
+ * compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (e.g.
+ * -DSCALAR=3)
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  VEC_DATA_TYPE(ACC_DATA_TYPE, 4)
+  sum_row_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))0;
+  ACC_DATA_TYPE sum_row = 0;
+
+  __global const DATA_TYPE *matrix_a =
+    (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y +
+                                 get_global_id(1) * src_stride_z);
+
+  int i = 0;
+
+  // This for loop performs 16 accumulations
+  for (; i <= ((int)COLS_A - 16); i += 16)
+  {
+    const VEC_DATA_TYPE(DATA_TYPE, 16) a0 = vload16(0, matrix_a + i);
+
+    sum_row_32 += CONVERT(a0.s0123, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+                  CONVERT(a0.s4567, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+                  CONVERT(a0.s89AB, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+                  CONVERT(a0.sCDEF, VEC_DATA_TYPE(ACC_DATA_TYPE, 4));
+  }
+
+  // This for loop performs the leftover accumulations
+  for (; i < COLS_A; ++i)
+  {
+    sum_row += (ACC_DATA_TYPE)matrix_a[i];
+  }
+
+  sum_row += sum_row_32.s0 + sum_row_32.s1 + sum_row_32.s2 + sum_row_32.s3;
+
+#if defined(SCALAR)
+  sum_row *= (int)SCALAR;
+#endif // defined(SCALAR)
+  *((__global int *)dst.ptr) = (int)sum_row;
+}
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A
+ * using the arm dot product instruction. It is also possible to multiply each reduced row by a
+ * scalar value, if SCALAR is passed at compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (e.g.
+ * -DSCALAR=3)
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  ACC_DATA_TYPE sum_row = 0;
+
+  __global const DATA_TYPE *matrix_a =
+    (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y +
+                                 get_global_id(1) * src_stride_z);
+
+  int i = 0;
+
+  // This for loop performs 16 accumulations
+  for (; i <= ((int)COLS_A - 32); i += 32)
+  {
+    VEC_DATA_TYPE(DATA_TYPE, 16)
+    a0 = vload16(0, matrix_a + i);
+
+    sum_row += arm_dot(a0.s0123, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.s4567, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.s89AB, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.sCDEF, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+
+    a0 = vload16(1, matrix_a + i);
+
+    sum_row += arm_dot(a0.s0123, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.s4567, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.s89AB, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.sCDEF, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+  }
+
+  // This for loop performs the leftover accumulations
+  for (; i < COLS_A; ++i)
+  {
+    sum_row += (ACC_DATA_TYPE)matrix_a[i];
+  }
+
+#if defined(SCALAR)
+  sum_row *= (int)SCALAR;
+#endif // defined(SCALAR)
+  *((__global int *)dst.ptr) = (int)sum_row;
+}
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+#endif // defined(COLS_A)
+
+#if defined(COLS_B) && defined(ROWS_B)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of
+ * Matrix B. It is also possible to multiply each reduced column by a scalar value, if SCALAR is
+ * passed at compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix B columns and rows needs to be passed at compile time using
+ * -DCOLS_B and -DROWS_B
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (i.e.
+ * -DSCALAR=3)
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  VEC_DATA_TYPE(ACC_DATA_TYPE, 16)
+  sum_col_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 16))0;
+
+  __global const DATA_TYPE *matrix_b =
+    (__global const DATA_TYPE *)(src.ptr + get_global_id(1) * src_stride_z);
+
+  int i = 0;
+  // This for loop performs 4 accumulations
+  for (; i <= ((int)ROWS_B - 4); i += 4)
+  {
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b0 = vload16(0, matrix_b + 0 * src_stride_y);
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b1 = vload16(0, matrix_b + 1 * src_stride_y);
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b2 = vload16(0, matrix_b + 2 * src_stride_y);
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b3 = vload16(0, matrix_b + 3 * src_stride_y);
+
+    sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+                  CONVERT(b1, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+                  CONVERT(b2, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+                  CONVERT(b3, VEC_DATA_TYPE(ACC_DATA_TYPE, 16));
+
+    matrix_b += 4 * src_stride_y;
+  }
+
+  // This for loop perfoms the leftover accumulations
+  for (; i < (int)ROWS_B; ++i)
+  {
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b0 = vload16(0, matrix_b);
+
+    sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16));
+
+    matrix_b += src_stride_y;
+  }
+
+#if defined(SCALAR)
+  sum_col_32 *= (VEC_DATA_TYPE(ACC_DATA_TYPE, 16))SCALAR;
+#endif // defined(SCALAR)
+  VSTORE(16)
+  (convert_int16(sum_col_32), 0, (__global int *)dst.ptr);
+}
+#endif // defined(COLS_B) && defined(ROWS_B)
+
+#endif // defined(DATA_TYPE) && defined(ACC_DATA_TYPE)
+
+#if defined(K_OFFSET)
+
+/* Helper function used to calculate the offset contribution after matrix multiplication.
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication),
+ * and calculates the offset contribution of matrix A and matrix B.
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * @param[in] x                                     get_global_id(0) * 4
+ * @param[in] y                                     get_global_id(1)
+ * @param[in] z                                     get_global_id(2)
+ * @param[in] sum_col_ptr                           (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_col_stride_x                      (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_col_step_x                        (Optional) sum_col_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y                      (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_col_step_y                        (Optional) sum_col_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] sum_row_ptr                           (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_row_stride_x                      (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_row_step_x                        (Optional) sum_row_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y                      (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_row_step_y                        (Optional) sum_row_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] biases_ptr                            (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x                       (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x                         (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes  (Optional) The offset of the first element in
+ * the biases tensor
+ */
+inline int4 offset_contribution(int x, int y, int z
+#if defined(A_OFFSET)
+                                ,
+                                IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+                                  ,
+                                IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+                                  ,
+                                VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS)
+)
+{
+  int4 a_offset_s32 = (int4)0;
+  int4 b_offset_s32 = (int4)0;
+
+  int batch_id = z;
+#if defined(DEPTH_INPUT3D)
+  batch_id /= (int)DEPTH_INPUT3D;
+#endif // defined(DEPTH_INPUT3D)
+
+#if defined(A_OFFSET)
+  // Compute the offset contribution due to A_OFFSET
+  __global uchar *sum_col_addr =
+    sum_col_ptr + sum_col_offset_first_element_in_bytes + x * sizeof(int);
+
+  // Compute the offset contribution due to A_OFFSET
+#if defined(SUM_COL_HAS_BATCHES)
+  a_offset_s32 = vload4(0, (__global int *)(sum_col_addr + batch_id * sum_col_stride_y));
+#else  // defined(SUM_COL_HAS_BATCHES)
+  a_offset_s32 = vload4(0, (__global int *)sum_col_addr);
+#endif // defined(SUM_COL_HAS_BATCHES)
+
+  a_offset_s32 *= (int4)A_OFFSET;
+#endif // defined(A_OFFSET)
+
+#if defined(B_OFFSET)
+  // Compute the offset contribution due to A_OFFSET
+  __global uchar *sum_row_addr =
+    sum_row_ptr + sum_row_offset_first_element_in_bytes + y * sizeof(int);
+
+  // Compute the offset contribution due to B_OFFSET
+#if defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+  b_offset_s32 = (int4) * (((__global int *)(sum_row_addr + batch_id * sum_row_stride_y)) +
+                           (z % (int)DEPTH_INPUT3D) * (int)HEIGHT_INPUT3D);
+#else  // defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+  b_offset_s32 = (int4) * (((__global int *)(sum_row_addr + batch_id * sum_row_stride_y)));
+#endif // defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+  b_offset_s32 *= (int4)B_OFFSET;
+#endif // defined(B_OFFSET)
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  b_offset_s32 += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  return (int4)K_OFFSET + a_offset_s32 + b_offset_s32;
+}
+
+/* OpenCL kernel used to add the offset contribution after matrix multiplication. The computation is
+ * performed in-place
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication),
+ * and adds to it the offset contribution of matrix A and matrix B in-place.
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The final result is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ *                   (sum_col[k] * A_OFFSET) +
+ *                   (sum_row[i] * B_OFFSET) +
+ *                   (K_OFFSET)
+ *
+ * @param[in] mm_result_ptr                           Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in] mm_result_stride_x                      Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] mm_result_step_x                        mm_result_stride_x * number of elements along
+ * X processed per workitem(in bytes)
+ * @param[in] mm_result_stride_y                      Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] mm_result_step_y                        mm_result_stride_y * number of elements along
+ * Y processed per workitem(in bytes)
+ * @param[in] mm_result_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] mm_result_step_z                        mm_result_stride_z * number of elements along
+ * Z processed per workitem(in bytes)
+ * @param[in] mm_result_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] sum_col_ptr                             (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_col_stride_x                        (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_col_step_x                          (Optional) sum_col_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y                        (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_col_step_y                          (Optional) sum_col_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes   (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] sum_row_ptr                             (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_row_stride_x                        (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_row_step_x                          (Optional) sum_row_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y                        (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_row_step_y                          (Optional) sum_row_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes   (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] biases_ptr                              (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x                         (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x                           (Optional) biases_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes    (Optional) The offset of the first element in
+ * the biases tensor
+ */
+__kernel void gemmlowp_offset_contribution(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+                                             ,
+                                           IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+                                             ,
+                                           IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+                                             ,
+                                           VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS))
+)
+{
+  const int x = get_global_id(0) * 4;
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  // Compute offset contribution
+  int4 offset_term_s32 = offset_contribution(
+    x, y, z
+#if defined(A_OFFSET)
+    ,
+    sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+    sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+    ,
+    sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+    sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+    ,
+    biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+  );
+
+  __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+                                   x * sizeof(int) + y * mm_result_stride_y +
+                                   z * mm_result_stride_z;
+
+  int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+  // Add the offset terms to GEMM's result
+  in_s32 += offset_term_s32;
+
+  // Store the result with the offset contribution
+  vstore4(in_s32, 0, (__global int *)mm_result_addr);
+}
+
+#if defined(RESULT_OFFSET) && defined(RESULT_MULTIPLIER) && defined(RESULT_SHIFT) && \
+  defined(OUTPUT_DATA_TYPE)
+/* OpenCL kernel used to add the offset contribution after @ref CLGEMMLowpMatrixMultiplyKernel and
+ * it quantizes down to uint8.
+ *
+ * This kernel takes a final int32 accumulator value (the output of
+ * @CLGEMMLowpMatrixMultiplyKernel), adds to it the offset contribution of matrix A and matrix B and
+ * quantizes to uint8 through the output stage.
+ *
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The result before the output stage is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ *                   (sum_col[k] * A_OFFSET) +
+ *                   (sum_row[i] * B_OFFSET) +
+ *                   (K_OFFSET)
+ *
+ * This result is quantized down to uint8/int8 using the output stage. The output stage computes the
+ * following operations:
+ *
+ *  -# Add offset terms to final result
+ *  -# Multiply each entry of result by result_mult_int
+ *  -# Add bias to final result (if -DADD_BIAS is passed at compile time)
+ *  -# Shift the int32 accumulator by result_shift
+ *  -# Clamp the value between the specified min and max bounds (if -DMIN_BOUND and/or -DMAX_BOUND
+ * are passed at compile time)
+ *  -# Clamp the resulting int32 values:
+ *      - to the [0..255] range and cast to QASYMM8.
+ *      - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  mm_result_ptr                                    Pointer to the source tensor.
+ * Supported data type: S32
+ * @param[in]  mm_result_stride_x                               Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_x                                 mm_result_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in]  mm_result_stride_y                               Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_y                                 mm_result_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in]  mm_result_stride_z                               Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_z                                 mm_result_stride_z * number of
+ * elements along Z processed per workitem(in bytes)
+ * @param[in]  mm_result_offset_first_element_in_bytes          The offset of the first element in
+ * the source tensor
+ * @param[in]  sum_col_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in]  sum_col_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_col_step_x                                   (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_col_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_col_step_y                                   (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_col_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  sum_row_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in]  sum_row_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_row_step_x                                   (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_row_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_row_step_y                                   (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_row_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  biases_ptr                                       (Optional) Pointer to the biases
+ * tensor. Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                                  (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                                    (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes             (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[out] dst_ptr                                          Pointer to the destination tensor
+ * Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  dst_stride_x                                     Stride of the destination tensor in
+ * X dimension (in bytes)
+ * @param[in]  dst_step_x                                       dst_gx_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                                     Stride of the destination tensor in
+ * Y dimension (in bytes)
+ * @param[in]  dst_step_y                                       dst_gx_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                                     Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in]  dst_step_z                                       src_stride_z * number of elements
+ * along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes                The offset of the first element in
+ * the destination tensor
+ * @param[in]  result_multipliers_ptr                           (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_multipliers_stride_x                      (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in]  result_multipliers_step_x                        (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in]  result_shifts_ptr                                (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_shifts_stride_x                           (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in]  result_shifts_step_x                             (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_shifts_offset_first_element_in_bytes      (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void gemmlowp_offset_contribution_quantize_down(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+                                                           ,
+                                                         IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+                                                           ,
+                                                         IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+                                                           ,
+#if defined(ADD_BIAS)
+                                                         VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                         TENSOR3D_DECLARATION(dst)
+#if defined(PER_CHANNEL_QUANTIZATION)
+                                                           ,
+                                                         VECTOR_DECLARATION(result_multipliers),
+                                                         VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+)
+{
+  const int x = get_global_id(0) * 4;
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  // Compute offset contribution
+  int4 offset_term_s32 = offset_contribution(
+    x, y, z
+#if defined(A_OFFSET)
+    ,
+    sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+    sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+    ,
+    sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+    sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+    ,
+    biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+  );
+
+  __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+                                   x * sizeof(int) + y * mm_result_stride_y +
+                                   z * mm_result_stride_z;
+
+  int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+  // Add the offset terms to GEMM's result
+  in_s32 += offset_term_s32;
+
+  // -------------- OUTPUT STAGE
+
+  // Add the offset terms to GEMM's result
+  in_s32 += (int4)RESULT_OFFSET;
+
+  // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+  __global uchar *result_multipliers_addr =
+    result_multipliers_ptr + result_multipliers_offset_first_element_in_bytes + x * sizeof(int);
+  __global uchar *result_shifts_addr =
+    result_shifts_ptr + result_shifts_offset_first_element_in_bytes + x * sizeof(int);
+  int4 result_multipliers_values = vload4(0, (__global int *)result_multipliers_addr);
+  int4 result_shifts_values = vload4(0, (__global int *)result_shifts_addr);
+
+  in_s32 *= result_multipliers_values;
+  in_s32 >>= result_shifts_values;
+#else  // defined(PER_CHANNEL_QUANTIZATION)
+  in_s32 *= RESULT_MULTIPLIER;
+
+  in_s32 >>= RESULT_SHIFT;
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(in_s32, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+
+/* OpenCL kernel used to add the offset contribution after matrix multiplication and it quantizes
+ * down to uint8.
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), adds to
+ * it the offset contribution of matrix A and matrix B and quantizes to uint8 through the output
+ * stage.
+ *
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The result before the output stage is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ *                   (sum_col[k] * A_OFFSET) +
+ *                   (sum_row[i] * B_OFFSET) +
+ *                   (K_OFFSET)
+ *
+ * This result is quantized down to uint8/int8 using the output stage. The output stage computes the
+ * following operations:
+ *
+ *  -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ *  -# Add bias to final result if bias tensor is not a nullptr
+ *  -# Round to nearest division by a power-of-two using result_shift
+ *  -# Add offset to each result
+ *  -# Clamp the value between the specified min and max bounds
+ *  -# Clamp the resulting int32 values:
+ *      - to the [0..255] range and cast to QASYMM8.
+ *      - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  mm_result_ptr                                    Pointer to the source tensor.
+ * Supported data type: S32
+ * @param[in]  mm_result_stride_x                               Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_x                                 mm_result_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in]  mm_result_stride_y                               Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_y                                 mm_result_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in]  mm_result_stride_z                               Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_z                                 mm_result_stride_z * number of
+ * elements along Z processed per workitem(in bytes)
+ * @param[in]  mm_result_offset_first_element_in_bytes          The offset of the first element in
+ * the source tensor
+ * @param[in]  sum_col_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in]  sum_col_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_col_step_x                                   (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_col_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_col_step_y                                   (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_col_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  sum_row_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in]  sum_row_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_row_step_x                                   (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_row_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_row_step_y                                   (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_row_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  biases_ptr                                       (Optional) Pointer to the biases
+ * tensor. Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                                  (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                                    (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes             (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[out] dst_ptr                                          Pointer to the destination tensor
+ * Supported data type: QASYMM8
+ * @param[in]  dst_stride_x                                     Stride of the destination tensor in
+ * X dimension (in bytes)
+ * @param[in]  dst_step_x                                       dst_gx_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                                     Stride of the destination tensor in
+ * Y dimension (in bytes)
+ * @param[in]  dst_step_y                                       dst_gx_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                                     Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in]  dst_step_z                                       src_stride_z * number of elements
+ * along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes                The offset of the first element in
+ * the destination tensor
+ * @param[in]  result_multipliers_ptr                           (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_multipliers_stride_x                      (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in]  result_multipliers_step_x                        (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in]  result_shifts_ptr                                (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_shifts_stride_x                           (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in]  result_shifts_step_x                             (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_shifts_offset_first_element_in_bytes      (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void
+  gemmlowp_offset_contribution_quantize_down_fixedpoint(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+                                                          ,
+                                                        IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+                                                          ,
+                                                        IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+                                                          ,
+#if defined(ADD_BIAS)
+                                                        VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                        TENSOR3D_DECLARATION(dst)
+#if defined(PER_CHANNEL_QUANTIZATION)
+                                                          ,
+                                                        VECTOR_DECLARATION(result_multipliers),
+                                                        VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+  )
+{
+  const int x = get_global_id(0) * 4;
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  // Compute offset contribution
+  int4 offset_term_s32 = offset_contribution(
+    x, y, z
+#if defined(A_OFFSET)
+    ,
+    sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+    sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+    ,
+    sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+    sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+    ,
+    biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+  );
+
+  __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+                                   x * sizeof(int) + y * mm_result_stride_y +
+                                   z * mm_result_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+  // Add the offset terms to GEMM's result
+  in_s32 += offset_term_s32;
+
+  // -------------- OUTPUT STAGE
+
+  // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+  __global uchar *result_multipliers_addr =
+    result_multipliers_ptr + result_multipliers_offset_first_element_in_bytes + x * sizeof(int);
+  __global uchar *result_shifts_addr =
+    result_shifts_ptr + result_shifts_offset_first_element_in_bytes + x * sizeof(int);
+  int4 result_multipliers_values = vload4(0, (__global int *)result_multipliers_addr);
+  int4 result_shifts_values = vload4(0, (__global int *)result_shifts_addr);
+
+  int4 in_s32_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+    in_s32, result_multipliers_values, result_shifts_values, 4);
+  int4 in_s32_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+    in_s32, result_multipliers_values, result_shifts_values, 4);
+  in_s32 = select(in_s32_shift_lt0, in_s32_shift_gt0, result_shifts_values >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+
+#if RESULT_SHIFT < 0
+  in_s32 =
+    ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4);
+#else  // RESULT_SHIFT >= 0
+  in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+  // Add the offset terms to GEMM's result
+  in_s32 += (int4)RESULT_OFFSET;
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(in_s32, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_MULTIPLIER) && defined(RESULT_SHIFT) &&
+       // defined(OUTPUT_DATA_TYPE)
+
+#endif // defined(K_OFFSET)
+
+#if defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value and processes it to obtain the final
+ * QASYMM8/QASYMM8_SIGNED value. The following computations will be performed by the kernel:
+ *
+ *  -# Add offset terms to final result
+ *  -# Multiply each entry of result by result_mult_int
+ *  -# Add bias to final result (if -DADD_BIAS is passed at compile time)
+ *  -# Shift the int32 accumulator by result_shift
+ *  -# Clamp the value between the specified min and max bounds (if -DMIN_BOUND and/or -DMAX_BOUND
+ * are passed at compile time)
+ *  -# Clamp the resulting int32 values:
+ *  -#  - to the [0..255] range and cast to QASYMM8.
+ *  -#  - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  src_ptr                              Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in]  src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in]  biases_ptr                           (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                      (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in]  biases_step_x                        (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                           dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                           dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+                                                  VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                  TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  int x = get_global_id(0) * 4;
+  int y = get_global_id(1);
+  int z = get_global_id(2);
+
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+                             y * src_stride_y + z * src_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  // Add the offset terms to GEMM's result
+  input_values += (int4)RESULT_OFFSET;
+
+  // Multiply by result_mult_int and shift
+  input_values *= RESULT_MULT_INT;
+
+#if RESULT_SHIFT < 0
+  input_values >>= -RESULT_SHIFT;
+#else  // RESULT_SHIFT >= 0
+  input_values >>= RESULT_SHIFT;
+#endif // RESULT_SHIFT < 0
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(input_values, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT)
+
+#if defined(RESULT_OFFSET_AFTER_SHIFT) && defined(RESULT_FIXEDPOINT_MULTIPLIER) && \
+  defined(RESULT_SHIFT)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. The following computations will be
+ * performed by the kernel:
+ *
+ *  -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ *  -# Add bias to final result if bias tensor is not a nullptr
+ *  -# Round to nearest division by a power-of-two using result_shift
+ *  -# Add offset to each result
+ *  -# Clamp the value between the specified min and max bounds
+ *  -# Clamp the resulting int32 values:
+ *      - to the [0..255] range and cast to QASYMM8.
+ *      - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET_AFTER_SHIFT, -DRESULT_FIXEDPOINT_MULTIPLIER
+ * and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  src_ptr                              Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in]  src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in]  biases_ptr                           (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                      (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in]  biases_step_x                        (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                           dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                           dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_fixedpoint(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+                                                             VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                             TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  int x = get_global_id(0) * 4;
+  int y = get_global_id(1);
+  int z = get_global_id(2);
+
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+                             y * src_stride_y + z * src_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  // Multiply by result_mult_int and shift
+#if RESULT_SHIFT < 0
+  input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+    input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#else  // RESULT_SHIFT >= 0
+  input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+    input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+  // Add the offset terms to GEMM's result
+  input_values += (int4)RESULT_OFFSET_AFTER_SHIFT;
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(input_values, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET_AFTER_SHIFT) && defined(RESULT_FIXEDPOINT_MULTIPLIER) &&
+       // defined(RESULT_SHIFT)
+
+#if defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT)
+
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QSYMM16 value. The following computations will be performed by
+ * the kernel:
+ *
+ *  -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ *  -# Add bias to final result if bias tensor is not a nullptr
+ *  -# Round to nearest division by a power-of-two using result_shift
+ *  -# Add offset to each result
+ *  -# Clamp the value between the specified min and max bounds
+ *  -# Clamp the resulting int32 values to the [-32768..32767] range and cast to QSYMM16.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_FIXEDPOINT_MULTIPLIER and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  src_ptr                              Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in]  src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in]  biases_ptr                           (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                      (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in]  biases_step_x                        (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor Supported data
+ * type: QSYMM16
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                           dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                           dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+                                                                     VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                                     TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  int x = get_global_id(0) * 4;
+  int y = get_global_id(1);
+  int z = get_global_id(2);
+
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+                             y * src_stride_y + z * src_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x * 2 + y * dst_stride_y + z * dst_stride_z;
+
+  int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  // Multiply by result_mult_int and shift
+#if RESULT_SHIFT < 0
+  input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+    input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#else  // RESULT_SHIFT >= 0
+  input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+    input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+  short4 res = convert_short4_sat(input_values);
+
+#if defined(MIN_BOUND)
+  res = max(res, (short4)MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (short4)MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global short *)dst_addr);
+}
+#endif // defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT)
+
+#if defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. The following computations will be
+ * performed by the kernel:
+ *
+ *  -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ *  -# Add bias to final result if bias tensor is not a nullptr
+ *  -# Requantize
+ *  -# Add offset to each result
+ *  -# Clamp the value between the specified min and max bounds
+ *  -# Clamp the resulting int32 values:
+ *      - to the [0..255] range and cast to QASYMM8.
+ *      - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset and scalar scale factor must be passed at compile time using
+ * -DRESULT_OFFSET, -DREAL_MULTIPLIER
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  src_ptr                              Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in]  src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in]  biases_ptr                           Pointer to the biases tensor. Supported data
+ * type: same as @p src_ptr
+ * @param[in]  biases_stride_x                      Stride of the biases tensor in X dimension (in
+ * bytes)
+ * @param[in]  biases_step_x                        biases_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes The offset of the first element in the biases
+ * tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor Supported data
+ * type: QASYMM8
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                           dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                           dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_w                         Stride of the source tensor in W dimension (in
+ * bytes)
+ * @param[in]  dst_step_w                           src_stride_w * number of elements along W
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_float(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+                                                        VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+#if defined(DST_HEIGHT)
+                                                        TENSOR4D_DECLARATION(dst))
+#else  // defined(DST_HEIGHT)
+                                                        TENSOR3D_DECLARATION(dst))
+#endif // defined(DST_HEIGHT)
+{
+  // Compute source and destination addresses
+  int x = get_global_id(0) * 4;
+  int y = get_global_id(1);
+  int z = get_global_id(2);
+
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+                             y * src_stride_y + z * src_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  // Convert to float
+  float4 input_values_f = convert_float4(input_values);
+  input_values_f = round(input_values_f * (float)REAL_MULTIPLIER + (float)OUTPUT_OFFSET);
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(input_values_f, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/memset.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/memset.cl

new file mode 100644 (file)

index 0000000..51919c8
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/memset.cl
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(DATA_TYPE) && defined(CONSTANT_VALUE) // Check for compile time constants
+
+/** Fill the tensor's planes with all value
+ * @attention The following variables must be passed at compile time:
+ * -# -DDATA_TYPE = Tensor data type. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * -# -DCONSTANT_VALUE = The value use to fill the tensor's planes
+ * -# -DVEC_SIZE = Vector size
+ * -# -DLAST_ACCESSED_X = The element that is on the X border (threads trying to set this, might
+ * need to step back a bit)
+ *
+ * @param[in] tensor_ptr                           Pointer to the source image. Data types
+ * supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] tensor_stride_x                      Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in] tensor_step_x                        tensor_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] tensor_stride_y                      Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in] tensor_step_y                        tensor_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] tensor_offset_first_element_in_bytes The offset of the first element in the source
+ * image
+ * @param[in] value                                The value used to fill the pages of the tensor
+ */
+__kernel void memset(TENSOR3D_DECLARATION(tensor))
+{
+  Tensor3D tensor = CONVERT_TO_TENSOR3D_STRUCT(tensor);
+
+#if defined(VEC_SIZE)
+
+#if defined(LAST_ACCESSED_X)
+  // Check if access on width gets out of bounds
+  // If it does shift access vector to access elements within bounds
+  const int xi = (int)(get_global_id(0) * VEC_SIZE);
+  tensor.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * tensor_stride_x;
+#endif // defined(LAST_ACCESSED_X)
+
+  VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+  data = (DATA_TYPE)(CONSTANT_VALUE);
+
+  VSTORE(VEC_SIZE)
+  (data, 0, (__global DATA_TYPE *)tensor.ptr);
+#else  // !defined(VEC_SIZE)
+  *((__global DATA_TYPE *)(tensor.ptr)) = (DATA_TYPE)(CONSTANT_VALUE);
+#endif // defined(VEC_SIZE)
+}
+
+#endif // Check for compile time constants
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pad_layer.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pad_layer.cl

new file mode 100644 (file)

index 0000000..96f2f9e
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/pad_layer.cl
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(DATA_TYPE) && defined(SELECT_DT) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && \
+  defined(SRC_WIDTH)
+
+#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
+#define VEC_SELECT VEC_DATA_TYPE(SELECT_DT, VEC_SIZE)
+#define OFFSETS VEC_OFFS(VEC_SELECT, VEC_SIZE)
+
+#if defined(CONST_VAL)
+/** Perform a pad operation when PaddingMode is CONSTANT
+ *
+ * @note Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @note Vector size must be passed using the -DVEC_SIZE compile flag, e.g. -DVEC_SIZE=4
+ * @note Constant value used to fill the pads must be passed using the -DCONST_VAL compile flag,
+ * e.g. -DCONST_VAL=1.27
+ * @note Pad to add to the left must be passed using the -DPAD_X_BEFORE compile flag, e.g.
+ * -DPAD_X_BEFORE=5
+ * @note Input tensor's width must be passed using the -DSRC_WIDTH compile flag, e.g.
+ * -DSRC_WIDTH=224
+ * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile
+ * flag, e.g. -DSELECT_DT=float
+ * @note In case pad left is more than the vector size, the number of threads to skip along the X
+ * axis must be passed using the -DNUM_THREADS_TO_SKIP_X compile flag, e.g.
+ * -DNUM_THREADS_TO_SKIP_X=1. This is defined as (PAD_X_BEFORE / VEC_SIZE)
+ * @note If pad also needs to be added to the top of the tensor, the following compile flags must be
+ * passed at compile time:
+ *       -# -DPAD_Y_BEFORE: Pad to add to the top of the input tensor (e.g. -DPAD_Y_BEFORE=3)
+ *       -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127)
+ * @note If pad also needs to be added to the depth of the tensor, the following compile flags must
+ * be passed at compile time:
+ *       -# -DPAD_Z_BEFORE: Pad to add before the first plane of the input tensor (e.g.
+ * -DPAD_Z_BEFORE=3)
+ *       -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32)
+ * @note If pad also needs to be added to the batch of the tensor, the following compile flags must
+ * be passed at compile time:
+ *       -# -DPAD_W_BEFORE: Pad to add before the first batch of the input tensor (e.g.
+ * -DPAD_W_BEFORE=3)
+ *       -# -DSRC_BATCH: Input tensor's batch size (e.g. -DSRC_BATCH=4)
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types:
+ * U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source image in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination image in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * image
+ * @param[in]  batch                             (Optional) Batch index if 4D pad must be applied
+ */
+__kernel void pad_layer_constant(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(PAD_W_BEFORE)
+                                                              ,
+                                 uint batch
+#endif // defined(PAD_W_BEFORE)
+)
+{
+  const int x = get_global_id(0);
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  uint cond = 0;
+
+#if defined(PAD_W_BEFORE)
+  cond |= batch < PAD_W_BEFORE || batch >= (SRC_BATCH + PAD_W_BEFORE);
+#endif // defined(PAD_W_BEFORE)
+#if defined(PAD_Z_BEFORE)
+  cond |= z < PAD_Z_BEFORE || z >= (SRC_DEPTH + PAD_Z_BEFORE);
+#endif // defined(PAD_Z_BEFORE)
+
+  if (cond)
+  {
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+    VSTORE(VEC_SIZE)
+    ((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr);
+  }
+  else
+  {
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#if defined(NUM_THREADS_TO_SKIP_X)
+    /* In case the pad left is greater than the vector size, and we are past the threads operating
+     * solely on pad values, the input pointer must be brought back along the X axis to start from
+     * the first non-pad values.
+     *
+     * E.g. with VEC_SIZE=2, PAD_X_BEFORE=5, CONST_VAL=0 and 1D input |1 2 3 4 5 6|:
+     *  -# The first thread will compute the output values |0 0| since it detects (x_outs == (0, 1))
+     * < PAD_X_BEFORE
+     *  -# The second thread will compute the output values |0 0| since it detects (x_outs == (2,
+     * 3)) < PAD_X_BEFORE
+     *  -# The third thread should compute |0 1|, however the input pointer is now ahead of ((x *
+     * VEC_SIZE) == 4) values, reading |4 5|
+     *  -# To detect this, we use ((PAD_X_BEFORE / VEC_SIZE) == NUM_THREADS_TO_SKIP_X == 2) and
+     * check that it is >= to the current x
+     *  -# So, we bring the pointer back of NUM_THREADS_TO_SKIP_X threads, which means multiplying
+     * this constant by the input's step along the X axis
+     *  -# Now that the pointer is back of ((NUM_THREADS_TO_SKIP_X * src_step_x) == 4) values, it
+     * will read the desired values |0 1|
+     */
+    src.ptr -= select(0u, NUM_THREADS_TO_SKIP_X * src_step_x, x >= NUM_THREADS_TO_SKIP_X);
+#endif // defined(NUM_THREADS_TO_SKIP_X)
+#if defined(PAD_Z_BEFORE)
+    src.ptr -= PAD_Z_BEFORE * src_step_z;
+#endif // defined(PAD_Z_BEFORE)
+#if defined(PAD_W_BEFORE)
+    src.ptr -= PAD_W_BEFORE * SRC_DEPTH * src_step_z;
+#endif // defined(PAD_W_BEFORE)
+
+    VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
+
+    VEC_INT xs_out = (VEC_INT)(x * VEC_SIZE) + CONVERT(OFFSETS, VEC_INT);
+    VEC_INT cond = xs_out < (VEC_INT)PAD_X_BEFORE || xs_out >= (VEC_INT)(SRC_WIDTH + PAD_X_BEFORE);
+#if defined(PAD_Y_BEFORE)
+    cond |=
+      (VEC_INT)y < (VEC_INT)PAD_Y_BEFORE || (VEC_INT)y >= (VEC_INT)(SRC_HEIGHT + PAD_Y_BEFORE);
+#endif // defined(PAD_Y_BEFORE)
+    VSTORE(VEC_SIZE)
+    (select(src_vals, (VEC_TYPE)CONST_VAL, CONVERT(cond, VEC_SELECT)), 0,
+     (__global DATA_TYPE *)dst.ptr);
+  }
+}
+#endif // defined(CONST_VAL)
+
+#if defined(PAD_X_BEFORE_REMAINDER) && defined(PAD_X_AFTER_REMAINDER) &&         \
+  defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) && \
+  defined(AFTER_PAD_FACT_X)
+
+#define SCALAR_COND(x) (VEC_SELECT) x == (VEC_SELECT)1
+#define ROTATE_REVERSE(x, n) ROTATE(REVERSE(x, VEC_SIZE), VEC_SIZE, n)
+#define SYMM_REFL_LEFT(x, n0, n1) \
+  select(ROTATE_REVERSE(x, n1), ROTATE(x, VEC_SIZE, n0), OFFSETS >= (VEC_SELECT)n0)
+#define SYMM_REFL_RIGHT(x, n0, n1) \
+  select(ROTATE(x, VEC_SIZE, n0), ROTATE_REVERSE(x, n1), OFFSETS >= (VEC_SELECT)n0)
+
+/** Perform a pad operation when PaddingMode is SYMMETRIC
+ *
+ * @note Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @note Vector size must be passed using the -DVEC_SIZE compile flag, e.g. -DVEC_SIZE=4
+ * @note Constant value must be passed using the -DCONST_VAL compile flag, e.g. -DCONST_VAL=1.27
+ * @note Pad to add to the left must be passed using the -DPAD_X_BEFORE compile flag, e.g.
+ * -DPAD_X_BEFORE=5
+ * @note Input tensor's width must be passed using the -DSRC_WIDTH compile flag, e.g.
+ * -DSRC_WIDTH=224
+ * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile
+ * flag, e.g. -DSELECT_DT=float
+ * @note Number of values to the left when operating across left padding must be passed using the
+ * -DPAD_X_BEFORE_REMAINDER compile flag, e.g. -DPAD_X_BEFORE_REMAINDER=5
+ * @note Number of values to the left when operating across right padding must be passed using the
+ * -DPAD_X_AFTER_REMAINDER compile flag, e.g. -DPAD_X_AFTER_REMAINDER=6
+ * @note To rearrange the vectors properly, (PAD_X_BEFORE_REMAINDER + 1) must be passed when mode is
+ * REFLECT using the -DPAD_X_BEFORE_REMAINDER_REFL compile flag, e.g. -DPAD_X_BEFORE_REMAINDER=6
+ * @note To rearrange the vectors properly, (PAD_X_AFTER_REMAINDER - 1) must be passed using the
+ * -DPAD_X_AFTER_REMAINDER_REFL compile flag, e.g. -DPAD_X_AFTER_REMAINDER=5
+ * @note When after pad X, starting point to read backward from must be passed using the
+ * -DAFTER_PAD_FACT_X compile flag, e.g. -DAFTER_PAD_FACT_X=253
+ * @note If padding mode is REFLECT, the -DIS_REFLECT compile flag must be set to 1, else it must be
+ * set to 0
+ * @note If pad also needs to be added to the top of the tensor, the following compile flags must be
+ * passed at compile time:
+ *       -# -DPAD_Y_BEFORE: Pad to add to the top of the input tensor (e.g. -DPAD_Y_BEFORE=3)
+ *       -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127)
+ * @note If pad also needs to be added to the depth of the tensor, the following compile flags must
+ * be passed at compile time:
+ *       -# -DPAD_Z_BEFORE: Pad to add before the first plane of the input tensor (e.g.
+ * -DPAD_Z_BEFORE=3)
+ *       -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32)
+ * @note If the starting point to read backward from is less than the output's last element accessed
+ * in the X, the following compile flags must be passed at compile time to avoid negative offsets:
+ *       -# -DAFTER_PAD_REM: Defines how much to rotate the vector if the backward calculation
+ * attempted to read from a negative offset (e.g. -DAFTER_PAD_REM=3)
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types:
+ * U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source image in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination image in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * image
+ */
+__kernel void pad_layer_symmetric_reflect(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Get current thread position
+  const int x = get_global_id(0);
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  // Define conditions based on the thread X position w.r.t. pad left and right
+  const int x_out_first = x * VEC_SIZE;
+  const int x_out_last = x_out_first + VEC_SIZE;
+  const int is_before_pad_left = (x_out_last <= PAD_X_BEFORE);
+  const int is_across_pad_left = (x_out_first < PAD_X_BEFORE) && (x_out_last > PAD_X_BEFORE);
+  const int is_inside_input =
+    (x_out_first >= PAD_X_BEFORE) && (x_out_last <= (SRC_WIDTH + PAD_X_BEFORE));
+  const int is_across_pad_right =
+    (x_out_first < (SRC_WIDTH + PAD_X_BEFORE)) && (x_out_last > (SRC_WIDTH + PAD_X_BEFORE));
+  const int is_after_pad_right = (x_out_first >= (SRC_WIDTH + PAD_X_BEFORE));
+
+  // Calculate base pointers
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes;
+  Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+  // Calculate input tensor's offset based on the defined conditions
+  int x_offset = 0;
+  x_offset = select(x_offset, PAD_X_BEFORE - x_out_last + IS_REFLECT, is_before_pad_left);
+  x_offset = select(x_offset, x_out_first - PAD_X_BEFORE, is_inside_input);
+  x_offset = select(x_offset, SRC_WIDTH - VEC_SIZE, is_across_pad_right);
+  x_offset = select(x_offset, AFTER_PAD_FACT_X - x_out_last, is_after_pad_right);
+
+#if defined(AFTER_PAD_REM)
+  int neg_offs = x_offset < 0;
+  x_offset = max(x_offset, 0);
+#endif // defined(AFTER_PAD_REM)
+
+  // Load input values from the computed offset
+  int y_in = y;
+  int z_in = z;
+#if defined(PAD_Y_BEFORE)
+  y_in = select(y - PAD_Y_BEFORE, PAD_Y_BEFORE - y + IS_REFLECT - 1, y < PAD_Y_BEFORE);
+  y_in = select(y_in, 2 * SRC_HEIGHT + PAD_Y_BEFORE - y - IS_REFLECT - 1,
+                y >= (SRC_HEIGHT + PAD_Y_BEFORE));
+#endif // defined(PAD_Y_BEFORE)
+#if defined(PAD_Z_BEFORE)
+  z_in = select(z - PAD_Z_BEFORE, PAD_Z_BEFORE - z + IS_REFLECT - 1, z < PAD_Z_BEFORE);
+  z_in = select(z_in, 2 * SRC_DEPTH + PAD_Z_BEFORE - z - IS_REFLECT - 1,
+                z >= (SRC_DEPTH + PAD_Z_BEFORE));
+#endif // defined(PAD_Y_BEFORE)
+
+  src_addr += x_offset * src_stride_x + y_in * src_step_y + z_in * src_step_z;
+
+#if SRC_WIDTH == 1
+  VSTORE(VEC_SIZE)
+  ((VEC_TYPE)(*(__global DATA_TYPE *)src_addr), 0, (__global DATA_TYPE *)dst.ptr);
+#else // SRC_WIDTH == 1
+
+  VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src_addr);
+
+  // Choose rearrangement policy based on the defined conditions
+  src_vals =
+    select(src_vals, SYMM_REFL_LEFT(src_vals, PAD_X_BEFORE_REMAINDER, PAD_X_BEFORE_REMAINDER_REFL),
+           SCALAR_COND(is_across_pad_left));
+  src_vals =
+    select(src_vals, SYMM_REFL_RIGHT(src_vals, PAD_X_AFTER_REMAINDER, PAD_X_AFTER_REMAINDER_REFL),
+           SCALAR_COND(is_across_pad_right));
+  src_vals = select(src_vals, REVERSE(src_vals, VEC_SIZE),
+                    SCALAR_COND((is_before_pad_left || is_after_pad_right)));
+#if defined(AFTER_PAD_REM)
+  src_vals = select(src_vals, ROTATE(src_vals, VEC_SIZE, AFTER_PAD_REM), SCALAR_COND(neg_offs));
+#endif // defined(AFTER_PAD_REM)
+
+  // Store
+  VSTORE(VEC_SIZE)
+  (src_vals, 0, (__global DATA_TYPE *)dst.ptr);
+#endif // SRC_WIDTH == 1
+}
+#endif // defined(PAD_X_BEFORE_REMAINDER) && defined(PAD_X_AFTER_REMAINDER) &&
+       // defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) &&
+       // defined(AFTER_PAD_FACT_X)
+#endif // defined(DATA_TYPE) && defined(SELECT_DT) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) &&
+       // defined(SRC_WIDTH)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/repeat.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/repeat.h

new file mode 100644 (file)

index 0000000..cfc811c
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/repeat.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_REPEAT_H
+#define ARM_COMPUTE_REPEAT_H
+
+#include "helpers.h"
+
+/** Macros that help in loop unrolling */
+// Repeat macros with 3 param, excluding the implicit ID param
+#define REPEAT_3_1(P_X, P_A, P_B, P_C) P_X##_DEF(0, P_A, P_B, P_C)
+#define REPEAT_3_2(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(1, P_A, P_B, P_C);         \
+  REPEAT_3_1(P_X, P_A, P_B, P_C)
+#define REPEAT_3_3(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(2, P_A, P_B, P_C);         \
+  REPEAT_3_2(P_X, P_A, P_B, P_C)
+#define REPEAT_3_4(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(3, P_A, P_B, P_C);         \
+  REPEAT_3_3(P_X, P_A, P_B, P_C)
+#define REPEAT_3_5(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(4, P_A, P_B, P_C);         \
+  REPEAT_3_4(P_X, P_A, P_B, P_C)
+#define REPEAT_3_6(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(5, P_A, P_B, P_C);         \
+  REPEAT_3_5(P_X, P_A, P_B, P_C)
+#define REPEAT_3_7(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(6, P_A, P_B, P_C);         \
+  REPEAT_3_6(P_X, P_A, P_B, P_C)
+#define REPEAT_3_8(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(7, P_A, P_B, P_C);         \
+  REPEAT_3_7(P_X, P_A, P_B, P_C)
+#define REPEAT_3_9(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(8, P_A, P_B, P_C);         \
+  REPEAT_3_8(P_X, P_A, P_B, P_C)
+#define REPEAT_3_10(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(9, P_A, P_B, P_C);          \
+  REPEAT_3_9(P_X, P_A, P_B, P_C)
+#define REPEAT_3_11(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(A, P_A, P_B, P_C);          \
+  REPEAT_3_10(P_X, P_A, P_B, P_C)
+#define REPEAT_3_12(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(B, P_A, P_B, P_C);          \
+  REPEAT_3_11(P_X, P_A, P_B, P_C)
+#define REPEAT_3_13(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(C, P_A, P_B, P_C);          \
+  REPEAT_3_12(P_X, P_A, P_B, P_C)
+#define REPEAT_3_14(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(D, P_A, P_B, P_C);          \
+  REPEAT_3_13(P_X, P_A, P_B, P_C)
+#define REPEAT_3_15(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(E, P_A, P_B, P_C);          \
+  REPEAT_3_14(P_X, P_A, P_B, P_C)
+#define REPEAT_3_16(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(F, P_A, P_B, P_C);          \
+  REPEAT_3_15(P_X, P_A, P_B, P_C)
+
+#define REPEAT_DEF_3_N(P_NUM, P_OP, P_A, P_B, P_C) \
+  REPEAT_3_##P_NUM(P_OP, P_A, P_B, P_C) // One level of indirection to ensure order of expansion
+                                        // does not affect preprocessing P_NUM
+#define REPEAT_3_N(P_NUM, P_OP, P_A, P_B, P_C) REPEAT_DEF_3_N(P_NUM, P_OP, P_A, P_B, P_C)
+
+// Repeat macros with 4 param, excluding the implicit ID param
+#define REPEAT_4_1(P_X, P_A, P_B, P_C, P_D) P_X##_DEF(0, P_A, P_B, P_C, P_D)
+#define REPEAT_4_2(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(1, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_1(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_3(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(2, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_2(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_4(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(3, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_3(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_5(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(4, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_4(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_6(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(5, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_5(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_7(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(6, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_6(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_8(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(7, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_7(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_9(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(8, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_8(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_10(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(9, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_9(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_11(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(A, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_10(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_12(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(B, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_11(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_13(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(C, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_12(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_14(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(D, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_13(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_15(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(E, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_14(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_16(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(F, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_15(P_X, P_A, P_B, P_C, P_D)
+
+#define REPEAT_DEF_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D) \
+  REPEAT_4_##P_NUM(P_OP, P_A, P_B, P_C, P_D) // One level of indirection to ensure order of
+                                             // expansion does not affect preprocessing P_NUM
+#define REPEAT_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D) REPEAT_DEF_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D)
+
+// Macro for initializing N variables. Generates N statements that defines VAR##N =
+// RHS_ACCESSOR_DEF(...)
+#define VAR_INIT_TO_CONST_DEF(ID, TYPE, VAR, VAL) TYPE VAR##ID = VAL
+#define REPEAT_VAR_INIT_TO_CONST(N, TYPE, VAR, VAL) REPEAT_3_N(N, VAR_INIT_TO_CONST, TYPE, VAR, VAL)
+
+// Macro for initializing N variables by converting the data type. Generates N statements that
+// defines VAR##N = RHS_ACCESSOR_DEF(...)
+#define VAR_INIT_CONVERT_SAT_DEF(ID, TYPE_OUT, VAR_IN, VAR_OUT) \
+  TYPE_OUT VAR_OUT##ID = CONVERT_SAT(VAR_IN##ID, TYPE_OUT)
+#define REPEAT_VAR_INIT_CONVERT_SAT(N, TYPE_OUT, VAR_IN, VAR_OUT) \
+  REPEAT_3_N(N, VAR_INIT_CONVERT_SAT, TYPE_OUT, VAR_IN, VAR_OUT)
+
+// Macro for adding a constant to N variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_CONST_TO_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID += (TYPE)VAL
+#define REPEAT_ADD_CONST_TO_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, ADD_CONST_TO_VAR, TYPE, VAR, VAL)
+
+// Macro for multiplying N variables (VAR_B) by a constant (VAL) and adding to other N variables
+// (VAR_A). Generates N statements that defines VAR_A##N =RHS_ACCESSOR_DEF(...)
+#define MLA_VAR_WITH_CONST_VEC_DEF(ID, VAR_A, VAR_B, VAL) VAR_A##ID += VAR_B##ID * VAL
+#define REPEAT_MLA_VAR_WITH_CONST_VEC(N, VAR_A, VAR_B, VAL) \
+  REPEAT_3_N(N, MLA_VAR_WITH_CONST_VEC, VAR_A, VAR_B, VAL)
+
+// Macro for adding a vector to N-variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_VECTOR_TO_VAR_DEF(ID, TYPE, VAR, VEC) VAR##ID += VEC
+#define REPEAT_ADD_VECTOR_TO_VAR(N, VAR, VEC) REPEAT_3_N(N, ADD_VECTOR_TO_VAR, "", VAR, VEC)
+
+// Macro for adding a two N-variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_TWO_VARS_DEF(ID, TYPE, VAR_A, VAR_B) VAR_A##ID += VAR_B##ID
+#define REPEAT_ADD_TWO_VARS(N, VAR_A, VAR_B) REPEAT_3_N(N, ADD_TWO_VARS, "", VAR_A, VAR_B)
+
+// Macro for performing Max between a constant and N variables. Generates N statements that defines
+// VAR##N =RHS_ACCESSOR_DEF(...)
+#define MAX_CONST_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID = max(VAR##ID, (TYPE)VAL)
+#define REPEAT_MAX_CONST_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, MAX_CONST_VAR, TYPE, VAR, VAL)
+
+// Macro for performing Min between a constant and N variables. Generates N statements that defines
+// VAR##N =RHS_ACCESSOR_DEF(...)
+#define MIN_CONST_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID = min(VAR##ID, (TYPE)VAL)
+#define REPEAT_MIN_CONST_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, MIN_CONST_VAR, TYPE, VAR, VAL)
+
+// Macro for performing ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE to N variables. Generates N
+// statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  VAR##ID = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, SIZE)
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+// Macro for performing ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE to N variables. Generates N
+// statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  VAR##ID = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, SIZE)
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+// Macro for performing per-channel ASYMM_MULT_BY_QUANT_MULTIPLIER to N variables.
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  ({                                                                                      \
+    VEC_DATA_TYPE(int, N0)                                                                \
+    VAR##ID_shift_lt0 =                                                                   \
+      ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, N0);   \
+    VEC_DATA_TYPE(int, N0)                                                                \
+    VAR##ID_shift_gt0 =                                                                   \
+      ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, N0);      \
+    VAR##ID = select(VAR##ID_shift_lt0, VAR##ID_shift_gt0, RES_SHIFT >= 0);               \
+  })
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+#endif // ARM_COMPUTE_REPEAT_H
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reshape_layer.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reshape_layer.cl

new file mode 100644 (file)

index 0000000..8da8bfc
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/reshape_layer.cl
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+/** Perform tensor reshape
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
+ * -DDATA_TYPE=short
+ *
+ * @param[in]  input_ptr                            Pointer to the first source tensor. Supported
+ * data types: All
+ * @param[in]  input_stride_x                       Stride of the first source tensor in X dimension
+ * (in bytes)
+ * @param[in]  input_step_x                         input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  input_stride_y                       Stride of the first source tensor in Y dimension
+ * (in bytes)
+ * @param[in]  input_step_y                         input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  input_stride_z                       Stride of the first source tensor in Z dimension
+ * (in bytes)
+ * @param[in]  input_step_z                         input_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the first
+ * source tensor
+ * @param[out] output_ptr                           Pointer to the destination tensor. Supported
+ * data types: same as @p input_ptr
+ * @param[in]  output_stride_x                      Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  output_step_x                        output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  output_stride_y                      Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  output_step_y                        output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  output_stride_z                      Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  output_step_z                        output_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ * @param[in]  input_shape                          Input spatial shape
+ * @param[in]  output_shape                         Output spatial shape
+ */
+__kernel void reshape_layer(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output),
+                            int2 input_shape, int2 output_shape)
+{
+  Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);
+  Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output);
+
+  int3 id = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
+
+  // Linearize index
+  int linear_idx = id.x + id.y * input_shape.x + id.z * input_shape.x * input_shape.y;
+
+  // Translate to output
+  int3 out_id;
+  out_id.x = linear_idx % output_shape.x;
+  out_id.y = (linear_idx / output_shape.x) % output_shape.y;
+  out_id.z = linear_idx / (output_shape.x * output_shape.y);
+
+  // Store result
+  *((__global DATA_TYPE *)tensor3D_offset(&out, out_id.x, out_id.y, out_id.z)) =
+    *((__global DATA_TYPE *)in.ptr);
+}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp

index 45307fad788495134cea08bdbffab26cb7b2248b..9874097390f53e20350feeeafe716b1e9f44a0d5 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp
@@ -39,16 +39,18 @@
   */
  #include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h"
  
-#include "arm_compute/core/AccessWindowStatic.h"
  #include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
  #include "arm_compute/core/CL/ICLTensor.h"
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
  
  #include "support/StringSupport.h"
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp

index ffa2c5a67a11cd46dcf9dd752779bd36fc630c28..a5daa24101f4102af4c69dc037fe2701b1a05e8b 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
@@ -43,6 +43,8 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/AccessWindowStatic.h"
  #include "support/StringSupport.h"
  
  using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp

index 3f2ae357de8c23fdbd18ef48c92f753d869cd7f0..dc06bfbb3d3d86a849d1dd98324bec35cc218e5d 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp
@@ -41,13 +41,16 @@
  
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
  #include "arm_compute/core/CL/ICLTensor.h"
  #include "arm_compute/core/CL/OpenCL.h"
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include "support/StringSupport.h"
  
  #include <cstddef>
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp

index e4c617c8df41995775a18d0cce46d4ac7db7851e..4206f1fd499c74beebc4b665e04769ee51d6f8af 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
@@ -43,6 +43,9 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
  #include "support/StringSupport.h"
  
  using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp

index 8b5885225db5fc2e6a81b00851362e23cee59281..62da2376ed4027d2dfb19096f443eb8f6447c96e 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
@@ -45,6 +45,10 @@
  #include "arm_compute/core/CL/ICLTensor.h"
  #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
  #include "arm_compute/core/UtilsEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include "support/StringSupport.h"
  
  using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp

index f0a761b9751f2d981252cc3b1e3371d568cd5947..03ca6ddcbe60c2fe974f74347f5d10fa1d45e5e7 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
@@ -43,6 +43,7 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/helpers/WindowHelpers.h"
  #include "support/StringSupport.h"
  
  using namespace arm_compute;
@@ -111,7 +112,7 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso
    _hits = hits;
  
    // Make _lookup_indices tensor
-  _lookup_indices = support::cpp14::make_unique<CLTensor>();
+  _lookup_indices = std::make_unique<CLTensor>();
    _lookup_indices->allocator()->init(
      TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
    _lookup_indices->allocator()->allocate();
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp

index dab6480b218c46d802b1bc2145bb576b0ca530aa..945af3c51a6799cb75660696c0d9a96ec9e878d4 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
@@ -42,12 +42,16 @@
  
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
  #include "arm_compute/core/CL/ICLTensor.h"
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Window.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include "support/StringSupport.h"
  #include "support/ToolchainSupport.h"
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMemsetKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMemsetKernel.cpp

new file mode 100644 (file)

index 0000000..a00fc5e
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMemsetKernel.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+CLMemsetKernel::CLMemsetKernel() : ICLKernel(), _tensor(nullptr), _full_window() {}
+
+void CLMemsetKernel::configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), tensor, constant_value, window);
+}
+
+void CLMemsetKernel::configure(const CLCompileContext &compile_context, ICLTensor *tensor,
+                               const PixelValue &constant_value, Window *window)
+{
+  ARM_COMPUTE_UNUSED(compile_context);
+  ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
+  ARM_COMPUTE_ERROR_THROW_ON(validate(tensor->info(), constant_value, window));
+
+  _tensor = tensor;
+
+  const DataType data_type = tensor->info()->data_type();
+  const int vec_size_x = 16 / tensor->info()->element_size();
+
+  // Create and update the window (if needed)
+  _full_window = calculate_max_window(*tensor->info());
+  Window win = _full_window;
+  if (window != nullptr)
+  {
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(win, *window);
+    win = *window;
+  }
+
+  const int output_width_x = win.num_iterations(0);
+  const bool multi_access_x = output_width_x >= vec_size_x;
+  const bool remainder_x = output_width_x % vec_size_x > 0;
+
+  if (multi_access_x)
+  {
+    win.set(
+      Window::DimX,
+      Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+  }
+  ICLKernel::configure_internal(win);
+
+  // Create kernel
+  CLBuildOptions build_opts;
+  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+  build_opts.add_option("-DCONSTANT_VALUE=" + string_from_pixel_value(constant_value, data_type));
+  build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+  build_opts.add_option_if(multi_access_x && remainder_x,
+                           "-DLAST_ACCESSED_X=" + support::cpp11::to_string(
+                                                    std::max<int>(output_width_x - vec_size_x, 0)));
+
+  _kernel =
+    static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("memset", build_opts.options()));
+}
+
+Status CLMemsetKernel::validate(const ITensorInfo *tensor, const PixelValue &constant_value,
+                                Window *window)
+{
+  ARM_COMPUTE_UNUSED(tensor);
+  ARM_COMPUTE_UNUSED(constant_value);
+  if (window != nullptr)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON(window->x().step() != 1);
+  }
+  return Status{};
+}
+
+void CLMemsetKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+  // Collapse all the batches on the third
+  Window collapsed = window.collapse_if_possible(_full_window, Window::DimZ);
+  Window slice = collapsed.first_slice_window_3D();
+
+  do
+  {
+    unsigned int idx = 0;
+    add_3D_tensor_argument(idx, _tensor, slice);
+    enqueue(queue, *this, slice, lws_hint());
+  } while (collapsed.slide_window_slice_3D(slice));
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp

index 1d4b141a7f2358687c6efd87bd5f6ad0e1d818e5..da7437e979dcf59e6c6b7b315eaafd0c4b07d535 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
@@ -40,15 +40,19 @@
  
  #include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
  
-#include "arm_compute/core/AccessWindowStatic.h"
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
  #include "arm_compute/core/CL/ICLTensor.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
+
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include "support/StringSupport.h"
  
  using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp

index ee633d437c655c93464517fe1cef7db0438a94e3..cd5e571e941f6af569cf2837851218b1f16c6d35 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
@@ -43,6 +43,9 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
  #include "support/StringSupport.h"
  
  using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp

index 0b8e7cc415b9ad9bbc516e644d56c0b57e8dcb15..4c4cbe710486466b30babe6400dfa8344ec1b0d0 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp
@@ -42,6 +42,10 @@
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include "support/StringSupport.h"
  #include <string>
  namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernelEx.cpp

new file mode 100644 (file)

index 0000000..b6efeac
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernelEx.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
+                          const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+  ARM_COMPUTE_UNUSED(constant_value);
+  ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
+  ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > input->num_dimensions());
+  if (mode == PaddingMode::REFLECT || mode == PaddingMode::SYMMETRIC)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 3);
+
+    const auto is_reflect = static_cast<unsigned int>(mode == PaddingMode::REFLECT);
+    for (size_t i = 0; i < padding.size(); ++i)
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).first > (input->dimension(i) - is_reflect));
+      ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).second > (input->dimension(i) - is_reflect));
+    }
+  }
+
+  if (output->total_size() > 0)
+  {
+    TensorShape padded_shape =
+      misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(output, input);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), padded_shape);
+  }
+
+  return Status{};
+}
+
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PaddingList &padding,
+                              PixelValue constant_value, PaddingMode mode,
+                              unsigned int &num_elems_processed_per_iteration)
+{
+  ARM_COMPUTE_UNUSED(constant_value, mode);
+
+  const TensorShape padded_shape =
+    misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
+  auto_init_if_empty(*output, input->clone()->set_tensor_shape(padded_shape));
+
+  num_elems_processed_per_iteration =
+    std::min(16U, 32U / static_cast<unsigned int>(element_size_from_data_type(input->data_type())));
+  if (input->dimension(0) < num_elems_processed_per_iteration)
+  {
+    num_elems_processed_per_iteration =
+      1 << static_cast<unsigned int>(std::log2(input->dimension(0)));
+  }
+
+  // Configure kernel window
+  Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+
+  const int input_start_x =
+    mode == PaddingMode::CONSTANT ? -(padding.at(0).first % num_elems_processed_per_iteration) : 0;
+  const int input_start_y =
+    (mode == PaddingMode::CONSTANT && padding.size() > 1) ? -padding.at(1).first : 0;
+
+  AccessWindowRectangle input_access(input, input_start_x, input_start_y,
+                                     num_elems_processed_per_iteration, 1);
+  AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+  const bool window_changed = update_window_and_padding(win, input_access, output_access);
+  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+  Status err = (window_changed)
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
+  return std::make_pair(err, win);
+}
+} // namespace
+
+CLPadLayerKernelEx::CLPadLayerKernelEx()
+  : _input(nullptr), _output(nullptr), _input_start_x(0), _input_start_y(0), _4d_enabled(false)
+{
+}
+
+void CLPadLayerKernelEx::configure(const ICLTensor *input, ICLTensor *output,
+                                   const PaddingList &padding, PixelValue constant_value,
+                                   PaddingMode mode)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value,
+            mode);
+}
+
+void CLPadLayerKernelEx::configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                                   ICLTensor *output, const PaddingList &padding,
+                                   PixelValue constant_value, PaddingMode mode)
+{
+  ARM_COMPUTE_UNUSED(compile_context);
+  // Perform validation step
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+  ARM_COMPUTE_ERROR_THROW_ON(
+    validate_arguments(input->info(), output->info(), padding, constant_value, mode));
+
+  _input = input;
+  _output = output;
+  _4d_enabled = (mode == PaddingMode::CONSTANT) && (padding.size() > 3);
+
+  // Configure window
+  unsigned int vec_size;
+  auto win_config = validate_and_configure_window(input->info(), output->info(), padding,
+                                                  constant_value, mode, vec_size);
+  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+  ICLKernel::configure_internal(win_config.second);
+
+  // Set build options
+  std::string kernel_name = "pad_layer_";
+
+  const DataType &data_type = input->info()->data_type();
+  const unsigned int input_width = input->info()->dimension(0);
+  const unsigned int input_height = input->info()->dimension(1);
+  const unsigned int input_depth = input->info()->dimension(2);
+  const unsigned int pad_x_before = padding.at(0).first;
+  const unsigned int pad_y_before = padding.size() > 1 ? padding.at(1).first : 0;
+  const unsigned int pad_z_before = padding.size() > 2 ? padding.at(2).first : 0;
+  const unsigned int pad_right_start = input_width + pad_x_before;
+
+  _input_start_x = mode == PaddingMode::CONSTANT ? -(pad_x_before % vec_size) : 0;
+  _input_start_y = (mode == PaddingMode::CONSTANT && padding.size() > 1) ? -padding.at(1).first : 0;
+
+  CLBuildOptions build_opts;
+  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+  build_opts.add_option("-DSELECT_DT=" + get_cl_select_type_from_data_type(data_type));
+  build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size));
+  build_opts.add_option("-DPAD_X_BEFORE=" + support::cpp11::to_string(pad_x_before));
+  build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input_width));
+  if (padding.size() > 1)
+  {
+    build_opts.add_option("-DPAD_Y_BEFORE=" + support::cpp11::to_string(pad_y_before));
+    build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input_height));
+
+    if (padding.size() > 2)
+    {
+      build_opts.add_option("-DPAD_Z_BEFORE=" + support::cpp11::to_string(pad_z_before));
+      build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input_depth));
+    }
+  }
+
+  switch (mode)
+  {
+    case PaddingMode::CONSTANT:
+    {
+      kernel_name += "constant";
+
+      build_opts.add_option("-DCONST_VAL=" + string_from_pixel_value(constant_value, data_type));
+      build_opts.add_option_if(pad_x_before >= vec_size,
+                               "-DNUM_THREADS_TO_SKIP_X=" +
+                                 support::cpp11::to_string(pad_x_before / vec_size));
+
+      if (_4d_enabled)
+      {
+        build_opts.add_option("-DPAD_W_BEFORE=" + support::cpp11::to_string(padding.at(3).first));
+        build_opts.add_option("-DSRC_BATCH=" +
+                              support::cpp11::to_string(input->info()->dimension(3)));
+      }
+
+      break;
+    }
+    case PaddingMode::SYMMETRIC:
+    case PaddingMode::REFLECT:
+    {
+      kernel_name += "symmetric_reflect";
+
+      const auto is_reflect = static_cast<unsigned int>(mode == PaddingMode::REFLECT);
+
+      const unsigned int pad_x_before_remainder = pad_x_before % vec_size;
+      const unsigned int pad_x_after_remainder = pad_right_start % vec_size;
+      const unsigned int after_pad_fact_x = (2 * input_width + pad_x_before) - is_reflect;
+      const unsigned int output_last_x =
+        ceil_to_multiple(pad_right_start + padding.at(0).second, vec_size);
+
+      build_opts.add_option("-DIS_REFLECT=" + support::cpp11::to_string(is_reflect));
+      build_opts.add_option("-DPAD_X_BEFORE_REMAINDER=" +
+                            support::cpp11::to_string(pad_x_before_remainder));
+      build_opts.add_option("-DPAD_X_AFTER_REMAINDER=" +
+                            support::cpp11::to_string(pad_x_after_remainder));
+      build_opts.add_option(
+        "-DPAD_X_BEFORE_REMAINDER_REFL=" +
+        support::cpp11::to_string((pad_x_before_remainder + is_reflect) % vec_size));
+      build_opts.add_option(
+        "-DPAD_X_AFTER_REMAINDER_REFL=" +
+        support::cpp11::to_string((pad_x_after_remainder - is_reflect) % vec_size));
+      build_opts.add_option("-DAFTER_PAD_FACT_X=" + support::cpp11::to_string(after_pad_fact_x));
+      build_opts.add_option_if(after_pad_fact_x < output_last_x,
+                               "-DAFTER_PAD_REM=" +
+                                 support::cpp11::to_string(after_pad_fact_x % vec_size));
+
+      break;
+    }
+    default:
+      ARM_COMPUTE_ERROR("Padding mode not supported.");
+  }
+
+  // Create kernel
+  _kernel = static_cast<cl::Kernel>(
+    CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+}
+
+Status CLPadLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+                                    const PaddingList &padding, PixelValue constant_value,
+                                    PaddingMode mode)
+{
+  unsigned int vec_size;
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, padding, constant_value, mode));
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
+                                                            output->clone().get(), padding,
+                                                            constant_value, mode, vec_size)
+                                .first);
+
+  return Status{};
+}
+
+void CLPadLayerKernelEx::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+  Window win_in = window;
+  win_in.adjust(Window::DimX, _input_start_x, true);
+  win_in.adjust(Window::DimY, _input_start_y, true);
+
+  Window slice_out = window.first_slice_window_3D();
+  Window slice_in = win_in.first_slice_window_3D();
+  unsigned int batch = 0;
+  do
+  {
+    unsigned int idx = 0;
+    add_3D_tensor_argument(idx, _input, slice_in);
+    add_3D_tensor_argument(idx, _output, slice_out);
+    if (_4d_enabled)
+    {
+      add_argument<unsigned int>(idx, batch++);
+    }
+
+    enqueue(queue, *this, slice_out, lws_hint());
+  } while (window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp

index b417a71035f4c9dcffa13b3d478486b997ed6eb6..9aa815f55120cdca802fddd7d6e9519850517f25 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
@@ -40,15 +40,19 @@
  
  #include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
  
-#include "arm_compute/core/AccessWindowStatic.h"
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
  #include "arm_compute/core/CL/ICLTensor.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include "support/StringSupport.h"
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp

index 3906009c2336b6f038ea811030df1d32c8b813fe..70374ba61e82d512041d7c698bb385e9a9ad2d20 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
@@ -43,6 +43,9 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
  #include "support/StringSupport.h"
  
  using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp

index 4a637444458f32e47dd38d7b3ad801372d6a1e45..c9d6dc31c112aab42ee4fe1b07d5274551bff532 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
@@ -40,7 +40,7 @@
  
  #include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
  
-#include "arm_compute/core/AccessWindowStatic.h"
+#include "src/core/AccessWindowStatic.h"
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
@@ -48,6 +48,10 @@
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include "support/StringSupport.h"
  
  #include <climits>
diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp

index c88bef6d79b0753c4e8e671c438c8cacb77a8629..1d4d33ac2543a769cf281d21b5312b5445c64e76 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
@@ -42,7 +42,7 @@
  
  #include <algorithm>
  #include "arm_compute/core/Types.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
+#include "src/core/NEON/NEAsymm.h"
  #include "arm_compute/core/ITensor.h"
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/Window.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp

index a8464afce189e8105bcc47a7a3e782bfcc29fd65..0551fc7db70d1b96d0d2ddef7c2aa5ab4f7b6a54 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
@@ -43,10 +43,10 @@
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
  #include "arm_compute/core/NEON/NEElementwiseOperationFuncs.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Validate.h"
+#include "src/core/NEON/wrapper/wrapper.h"
  
  #include <algorithm>
  #include <arm_neon.h>
@@ -163,7 +163,7 @@ void elementwise_logic_op(const ITensor *in1, const ITensor *in2, ITensor *out,
  
  std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> configure_func(
    const ITensor *input1, const ITensor *input2, ITensor *output,
-  std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function)
+  std::map<std::string, cpu::kernels::CpuElementwiseKernel::ElementwiseFunction *> map_function)
  {
    std::string function_to_call("op_");
    function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
@@ -185,9 +185,9 @@ template <BinaryLogicalOperation op>
  std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
  configure_logic_func(const ITensor *input1, const ITensor *input2, ITensor *output)
  {
-  static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = {
-    {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
-    {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
+  static std::map<std::string, cpu::kernels::CpuElementwiseKernel::ElementwiseFunction *>
+    map_function = {{"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
+                    {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
  
    return configure_func(input1, input2, output, map_function);
  }
@@ -196,7 +196,7 @@ void NEBinaryLogicalOperationKernel::configure(BinaryLogicalOperation op, const
                                                 const ITensor *input2, ITensor *output)
  {
    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info()));
-  configure_common(input1, input2, output);
+  configure_common(input1->info(), input2->info(), output->info());
    switch (op)
    {
      case BinaryLogicalOperation::AND:
@@ -251,5 +251,4 @@ Status NEBinaryLogicalOperationKernel::validate(BinaryLogicalOperation op,
    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
    return Status{};
  }
-
  } // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp

index f935596e63d7faa97d4796e0547a90f76f40294c..87e716b4f7e323a606549b0e05f7cd5ad234e543 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp
@@ -39,16 +39,19 @@
   */
  #include "arm_compute/core/NEON/kernels/NECastBoolKernel.h"
  
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEMath.h"
+#include "src/core/NEON/NEMath.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/SaturateCast.h"
+#include "support/SaturateCast.h"
  
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+#include "src/core/NEON/INEKernel.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp

index e3a77c6b1e8c84e8ff3a6963402c71bcf8c7da84..3ad9ee945880fa0f9153ad513ebdfbbf4ebb8023 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
@@ -47,6 +47,9 @@
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
  
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  using namespace arm_compute;
  
  NEEmbeddingLookupKernel::NEEmbeddingLookupKernel()
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp

new file mode 100644 (file)

index 0000000..375fa28
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/NEFixedPoint.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include <arm_neon.h>
+#include <cstddef>
+#include <cstdint>
+#include <mutex>
+
+using namespace arm_compute;
+
+namespace
+{
+inline Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(accum);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+  ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
+  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != accum->dimension(0));
+
+  return Status{};
+}
+
+inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *accum,
+                                                               ITensorInfo *biases)
+{
+  constexpr unsigned int num_elems_processed_per_iteration = 16;
+
+  // Configure kernel window
+  Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
+
+  bool window_changed = update_window_and_padding(
+    win, AccessWindowHorizontal(accum, 0, num_elems_processed_per_iteration),
+    AccessWindowStatic(biases, 0, 0,
+                       ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration),
+                       biases->tensor_shape().y()));
+
+  AccessWindowHorizontal output_access(accum, 0, num_elems_processed_per_iteration);
+
+  // Set the valid region for the accum tensor
+  Coordinates coord;
+  coord.set_num_dimensions(accum->num_dimensions());
+  output_access.set_valid_region(win, ValidRegion(coord, accum->tensor_shape()));
+
+  Status err = (window_changed)
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
+  return std::make_pair(err, win);
+}
+} // namespace
+
+NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel()
+  : _accum(nullptr), _biases(nullptr)
+{
+}
+
+void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
+
+  // Perform validate step
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
+
+  _biases = biases;
+  _accum = accum;
+
+  // Configure kernel window
+  auto win_config = validate_and_configure_window(accum->info(), biases->info());
+  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+  INEKernel::configure(win_config.second);
+}
+
+Status NEGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum,
+                                                    const ITensorInfo *biases)
+{
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
+  ARM_COMPUTE_RETURN_ON_ERROR(
+    validate_and_configure_window(accum->clone().get(), biases->clone().get()).first);
+
+  return Status{};
+}
+
+std::mutex m;
+void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadInfo &info)
+{
+  std::lock_guard<std::mutex> lock_guard(m);
+  ARM_COMPUTE_UNUSED(info);
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+
+  Window win_biases;
+  win_biases.set(Window::DimX,
+                 Window::Dimension(window.x().start(), window.x().end(), window.x().step()));
+  win_biases.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+  Iterator in0_out(_accum, window);
+  Iterator in1(_biases, win_biases);
+
+  switch (_accum->info()->data_type())
+  {
+    case DataType::F32:
+    {
+      execute_window_loop(
+        window,
+        [&](const Coordinates &) {
+          const float32x4x4_t accum = vld4q_f32(reinterpret_cast<const float *>(in0_out.ptr()));
+          const float32x4x4_t biases = vld4q_f32(reinterpret_cast<const float *>(in1.ptr()));
+          const float32x4x4_t res = {
+            {vaddq_f32(accum.val[0], biases.val[0]), vaddq_f32(accum.val[1], biases.val[1]),
+             vaddq_f32(accum.val[2], biases.val[2]), vaddq_f32(accum.val[3], biases.val[3])}};
+
+          vst4q_f32(reinterpret_cast<float *>(in0_out.ptr()), res);
+        },
+        in0_out, in1);
+      break;
+    }
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    case DataType::F16:
+    {
+      execute_window_loop(
+        window,
+        [&](const Coordinates &) {
+          const float16x8x2_t accum = vld2q_f16(reinterpret_cast<const float16_t *>(in0_out.ptr()));
+          const float16x8x2_t biases = vld2q_f16(reinterpret_cast<const float16_t *>(in1.ptr()));
+          const float16x8x2_t res = {
+            {vaddq_f16(accum.val[0], biases.val[0]), vaddq_f16(accum.val[1], biases.val[1])}};
+
+          vst2q_f16(reinterpret_cast<float16_t *>(in0_out.ptr()), res);
+        },
+        in0_out, in1);
+      break;
+    }
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+    default:
+      ARM_COMPUTE_ERROR("Data type not supported");
+      break;
+  }
+}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp

index c9f0799d4b58fc3813a9aeb92a247b33908019a7..d4144e6b91007145664ddc913201005b08f181b5 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
@@ -40,7 +40,7 @@
  
  #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
  
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
  #include "arm_compute/core/Coordinates.h"
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
@@ -50,6 +50,9 @@
  #include "arm_compute/core/Window.h"
  #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
  
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  namespace arm_compute
  {
  namespace
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp

index 52b40e76750f472a92b27579763ba9608523911a..f178865b7434ecce145cd049e7bd8fd0d661f875 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
@@ -47,6 +47,9 @@
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
  
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include <unordered_map>
  
  using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp

index 4dc0f55350fee58247d38e3ac4c3f4889b3e31f1..7804f9c6a23bc71d23b157f8eafc3e70b24a4964 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
@@ -40,17 +40,22 @@
  
  #include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h"
  
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEMath.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "src/core/NEON/NEMath.h"
+#include "src/core/NEON/INEKernel.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
  
+#include "src/core/NEON/wrapper/wrapper.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include <arm_neon.h>
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp

index ad4728175e338145d259f633903e9b44c249867f..8ad998313ff65b56fc518cb4282a00465aaf3b77 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
@@ -42,13 +42,15 @@
  
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
  
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/INEKernel.h"
+#include "src/core/NEON/NEAsymm.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/WindowHelpers.h"
  
  #include <arm_neon.h>
  
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp

index 0daff5c6a1126d0a16c9f980f1fd54a0949e58f6..e56fbf7f3c39411c9312dbfdad87f6add1abee96 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp
@@ -38,7 +38,7 @@
   * SOFTWARE.
   */
  #include "arm_compute/core/NEON/kernels/NEOneHotKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
  #include "arm_compute/core/Coordinates.h"
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
@@ -47,6 +47,10 @@
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
  #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
  namespace arm_compute
  {
  namespace
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp

index 2306228d51b20c3d4643743c4d7d8af58abfc57f..420e5063cc091b369993facf555dd2db73901f33 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
@@ -42,13 +42,16 @@
  
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "src/core/NEON/NEAsymm.h"
+#include "src/core/NEON/INEKernel.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
  
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
+
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/WindowHelpers.h"
  
  #include <arm_neon.h>
  
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp

index b02a48ef2742c8632eb579dd7847a59cc1adab0a..6b9b0d4b46d60d970fac21ad1da7184a9b874eff 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
@@ -45,7 +45,9 @@
  #include "arm_compute/core/Types.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/Utils.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/runtime/Utils.h"
  
  namespace arm_compute
  {
@@ -66,7 +68,7 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT
                                    "Reduction axis greater than max number of dimensions");
    ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
    const unsigned int num_of_stages =
-    calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
+    utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
  
    DataType output_data_type = DataType::S32;
    TensorInfo not_reshaped_output;
@@ -132,7 +134,7 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT
      ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(
        input, &sums_vector[last_stage - 1], &not_reshaped_output, axis, op));
    }
-  ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(&not_reshaped_output, output));
+  ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayer::validate(&not_reshaped_output, output));
    return Status{};
  }
  
@@ -140,7 +142,7 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor *
                                     const ReductionOperation &op)
  {
    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-  _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
+  _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
    _reduction_axis = axis;
  
    const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(
@@ -204,7 +206,8 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor *
                                                      &_not_reshaped_output, axis, op);
      _results_vector[last_stage - 1].allocator()->allocate();
    }
-  _reshape_kernel.configure(&_not_reshaped_output, output);
+  _reshape_kernel.configure(CLKernelLibrary::get().get_compile_context(), &_not_reshaped_output,
+                            output);
    _not_reshaped_output.allocator()->allocate();
  }
  
@@ -216,6 +219,6 @@ void CLArgMinMaxLayerEx::run()
    {
      CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
    }
-  CLScheduler::get().enqueue(_reshape_kernel, false);
+  _reshape_kernel.run();
  }
  } // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp

index e5122ab8f327aa30a5bfb664db37583cd20451bc..31c96b080018b019279bd1e0a13334f7e321dc3c 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
@@ -42,13 +42,14 @@
  
  #include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
  
  using namespace arm_compute;
  
  void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
                                    BinaryLogicalOperation op)
  {
-  auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
+  auto k = std::make_unique<CLBinaryLogicalOpKernel>();
    k->configure(input1, input2, output, op);
    _kernel = std::move(k);
  
@@ -57,7 +58,7 @@ void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTenso
      ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
      if (broadcasted_info->info()->dimension(0) == 1)
      {
-      _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+      _border_handler->configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
      }
    }
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp

index c7d0ac8e2706033a5ed9a587f4b9318348afd9db..96f9c17a95ca8b6e6406212a41370dfca57b0d02 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp
@@ -46,7 +46,7 @@ using namespace arm_compute;
  
  void CLCastBool::configure(ICLTensor *input, ICLTensor *output)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLCastBoolKernel>();
+  auto k = std::make_unique<CLCastBoolKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp

index 6359b4bcb8e9ab5546b68890bd802f0aa98e136d..464f60deec84e3eaa339c5b1f60b6fbbd62923d6 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
@@ -45,6 +45,8 @@
  #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
  
+#include "src/core/helpers/AutoConfiguration.h"
+
  #include <memory>
  #include <tuple>
  
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp

index ae9d8afc63fe6a15a259de091bb33b2345fbb7dd..003ec8042e2468f2c05d587546590786dd9ce9df 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
@@ -39,7 +39,6 @@
   */
  
  #include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h"
-
  #include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
  
  using namespace arm_compute;
@@ -47,7 +46,7 @@ using namespace arm_compute;
  void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
                                    const ICLTensor *lookups)
  {
-  auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>();
+  auto k = std::make_unique<CLEmbeddingLookupKernel>();
    k->configure(input, output, lookups);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp

index 79d0929a9517b3d20cdbdabed2bb94974f7a4789..af936e873e4c8516fecdc49f88e46cd308b9f4e0 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
@@ -45,7 +45,6 @@
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
  
  #include <algorithm>
  
@@ -68,7 +67,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
  
  void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
  {
-  auto k = support::cpp14::make_unique<CLTransposeKernel>();
+  auto k = std::make_unique<CLTransposeKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp

index 13d3acbac35c4979f1e41a99ef5ff6eab6608d84..c6a88d3409ff0ffda550c533e394944481ecd700 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
@@ -42,11 +42,11 @@
  
  #include "arm_compute/core/Size2D.h"
  #include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/Cast.h"
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
+
+#include "support/Cast.h"
  
  #include <algorithm>
  
@@ -141,7 +141,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
  
  void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
  {
-  auto k = support::cpp14::make_unique<CLTransposeKernel>();
+  auto k = std::make_unique<CLTransposeKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp

index ac6982e6f02625651998f8c24481f7f463c72d03..cda7845418950d35e2ea2b2485f2b8cdb47de412 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
@@ -19,6 +19,7 @@
  #include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h>
  #include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
  #include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h>
+#include "src/core/helpers/AutoConfiguration.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp

new file mode 100644 (file)

index 0000000..cd74094
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "support/StringSupport.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(accum);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+  ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() != 1);
+
+  return Status{};
+}
+
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases, GPUTarget gpu_target,
+                              unsigned int &num_elems_processed_per_iteration)
+{
+  // Select the vector size to use (8 for Bifrost; 16 for Midgard).
+  bool is_gpu_bifrost =
+    gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G76, GPUTarget::G51,
+                     GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::G52, GPUTarget::G52LIT);
+  num_elems_processed_per_iteration = is_gpu_bifrost ? 8 : 16;
+
+  // Configure kernel window
+  Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
+
+  AccessWindowStatic biases_access(
+    biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration),
+    biases->dimension(1));
+  AccessWindowHorizontal accum_access(accum, 0, num_elems_processed_per_iteration);
+
+  bool window_changed = update_window_and_padding(win, biases_access, accum_access);
+
+  Status err = (window_changed)
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
+  return std::make_pair(err, win);
+}
+} // namespace
+
+CLGEMMMatrixAccumulateBiasesKernel::CLGEMMMatrixAccumulateBiasesKernel()
+  : _accum(nullptr), _biases(nullptr)
+{
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), accum, biases);
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(const CLCompileContext &compile_context,
+                                                   ICLTensor *accum, const ICLTensor *biases)
+{
+  ARM_COMPUTE_UNUSED(compile_context);
+  // Perform validate step
+  ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
+
+  _biases = biases;
+  _accum = accum;
+
+  // Get the target gpu
+  GPUTarget gpu_target = get_target();
+  unsigned int vector_size = 0;
+
+  // Configure kernel window
+  auto win_config =
+    validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size);
+  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+  ICLKernel::configure_internal(win_config.second);
+
+  // Add build options
+  CLBuildOptions build_opts;
+  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type()));
+  build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
+
+  // Create kernel
+  _kernel = static_cast<cl::Kernel>(
+    CLKernelLibraryEx::get().create_kernel("gemm_accumulate_biases", build_opts.options()));
+}
+
+Status CLGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum,
+                                                    const ITensorInfo *biases, GPUTarget gpu_target)
+{
+  unsigned int num_elems_processed_per_iteration = 0;
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(),
+                                                            biases->clone().get(), gpu_target,
+                                                            num_elems_processed_per_iteration)
+                                .first);
+
+  return Status{};
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
+
+  Window accum_slice = window.first_slice_window_2D();
+
+  Window biases_slice(accum_slice);
+  biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+  // Run kernel
+  do
+  {
+    // Set arguments
+    unsigned int idx = 0;
+    add_2D_tensor_argument(idx, _accum, accum_slice);
+    add_1D_tensor_argument(idx, _biases, biases_slice);
+
+    enqueue(queue, *this, accum_slice, lws_hint());
+  } while (window.slide_window_slice_2D(accum_slice));
+}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp

index e0b833b048c034ebbed7090a3090cf85fdcb3de1..f380e3e2c3e5a117cf632f8e5f8c04fa2a46b7ab 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
@@ -41,6 +41,8 @@
  #include "arm_compute/runtime/CL/functions/CLGatherEx.h"
  
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
+
  #include "arm_compute/core/CL/kernels/CLGatherExKernel.h"
  
  using namespace arm_compute;
@@ -48,7 +50,7 @@ using namespace arm_compute;
  void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output,
                             int axis)
  {
-  auto k = support::cpp14::make_unique<CLGatherExKernel>();
+  auto k = std::make_unique<CLGatherExKernel>();
    k->configure(input, indices, output, axis);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp

index 65b89a389fb61688329fc84838c6ef1e30a20b45..9896abd4b5f49619925126a7f4f25793b54fdd0b 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
@@ -47,7 +47,7 @@ using namespace arm_compute;
  void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
                                    const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
  {
-  auto k = support::cpp14::make_unique<CLHashtableLookupKernel>();
+  auto k = std::make_unique<CLHashtableLookupKernel>();
    k->configure(lookups, keys, input, output, hits);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp

index 5a7e4083916fa92291be1c7ab703ee0e937706bf..ca45a57f8df4c146480cb7e39186f8278882609e 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
@@ -50,7 +50,7 @@ CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {}
  void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
                                                 ICLTensor *gamma, ICLTensor *beta, float epsilon)
  {
-  auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
+  auto k = std::make_unique<CLInstanceNormalizationLayerKernelEx>();
    k->configure(input, output, gamma, beta, epsilon);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp

index 28e5bc0da63c4a479edd50300a60e0eace9c3d9a..2bdc451b35a024573164a7afd92ad9172025b081 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
@@ -46,7 +46,7 @@ using namespace arm_compute;
  
  void CLNeg::configure(ICLTensor *input, ICLTensor *output)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>();
+  auto k = std::make_unique<CLNegKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp

index aa9f32ec6e355c6ac4523d5b98a229047c4d0c50..759a19ff36f62b7ac917b8577653e184923580fe 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp
@@ -41,7 +41,7 @@
  #include "arm_compute/core/CL/ICLTensor.h"
  #include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
+
  namespace arm_compute
  {
  CLOneHot::CLOneHot() : _memset_kernel(), _onehot_kernel(), _has_to_memset(false) {}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp

new file mode 100644 (file)

index 0000000..4d940e9
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h"
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+
+namespace arm_compute
+{
+CLPadLayerEx::CLPadLayerEx()
+  : _pad_kernel(std::make_unique<CLPadLayerKernelEx>()),
+    _copy_kernel(std::make_unique<opencl::kernels::ClCopyKernel>()), _perform_pad(false)
+{
+}
+
+void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+                             PixelValue constant_value, PaddingMode mode)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value,
+            mode);
+}
+
+void CLPadLayerEx::configure(const CLCompileContext &compile_context, ICLTensor *input,
+                             ICLTensor *output, const PaddingList &padding,
+                             PixelValue constant_value, PaddingMode mode)
+{
+  ARM_COMPUTE_ERROR_THROW_ON(
+    validate(input->info(), output->info(), padding, constant_value, mode));
+
+  _perform_pad = std::any_of(padding.begin(), padding.end(),
+                             [](PaddingInfo info) { return info.first > 0 || info.second > 0; });
+
+  if (_perform_pad)
+  {
+    _pad_kernel->configure(compile_context, input, output, padding, constant_value, mode);
+  }
+  else
+  {
+    Window copy_window = Window();
+    copy_window.use_tensor_dimensions(output->info()->tensor_shape());
+    // Copy the input to the whole output if no padding is applied
+    _copy_kernel->configure(compile_context, input->info(), output->info(), &copy_window);
+  }
+}
+Status CLPadLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+                              const PaddingList &padding, PixelValue constant_value,
+                              PaddingMode mode)
+{
+  bool perform_pad = std::any_of(padding.begin(), padding.end(), [](PaddingInfo info) {
+    return info.first > 0 || info.second > 0;
+  });
+
+  if (perform_pad)
+  {
+    ARM_COMPUTE_RETURN_ON_ERROR(
+      CLPadLayerKernelEx::validate(input, output, padding, constant_value, mode));
+  }
+  else
+  {
+    ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCopyKernel::validate(input, output));
+  }
+  return Status{};
+}
+void CLPadLayerEx::run()
+{
+  if (_perform_pad)
+  {
+    CLScheduler::get().enqueue(*_pad_kernel);
+  }
+  else
+  {
+    CLScheduler::get().enqueue(*_copy_kernel);
+  }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp

index c246041bbd50c129d28a4a73b4d73ade4abe2a49..6740835a8fcfd10b17201f1b7fef3e980ff578e6 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -61,7 +61,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
    ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1);
  
    // Create temporary tensor infos
-  auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+  auto interm_tensors = std::make_unique<TensorInfo[]>(num_of_interm_tensors);
  
    // Create intermediate tensor info
    TensorShape shape{input->tensor_shape()};
@@ -124,8 +124,8 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
      throw std::runtime_error("CLReduceOperation: there is no axis to reduce");
    }
  
-  _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
-  _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
+  _interm_tensors = std::make_unique<CLTensor[]>(num_of_interm_tensors);
+  _reduce_kernels = std::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
  
    // Set a vector that is ordered ICLTensors sequentially.
    std::vector<ICLTensor *> tensors;
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp

index 12c0aa829243c490836251ae42d05c5b7982b4c2..73f5f6eb1a194665d1b749d0cd5616504c438198 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
@@ -47,6 +47,7 @@
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
  #include <cassert>
  
  using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp

index 0754fd8134196c85671324c975eb8b5e0d3ce04c..f3f093c188dbfefe462cd4a4344bb71e4d1ec695 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -79,7 +79,7 @@ void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, IC
    {
      case DeconvolutionMethod::DIRECT:
      {
-      auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>();
+      auto f = std::make_unique<CLDirectTransposeConvLayer>();
        f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right,
                     invalid_bottom, weights_info);
        _function = std::move(f);
@@ -87,7 +87,7 @@ void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, IC
      }
      case DeconvolutionMethod::GEMM:
      {
-      auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
+      auto f = std::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
        f->configure(compile_context, input, weights, bias, output, deconv_info);
        _function = std::move(f);
        break;
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp

index 2fc94b267c9cd3383e0c618f90b8de46c0af51a6..e6b7329d1bbb8b80d7cf210854da3d75faab0cc3 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
@@ -38,11 +38,10 @@
   * SOFTWARE.
   */
  
-#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
  #include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
+#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
  
  #include "arm_compute/core/ITensor.h"
-#include "support/MemorySupport.h"
  
  #include <utility>
  
@@ -53,7 +52,7 @@ template <BinaryLogicalOperation COP>
  void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2,
                                                      ITensor *output)
  {
-  auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+  auto k = std::make_unique<NEBinaryLogicalOperationKernel>();
    k->configure(COP, input1, input2, output);
    _kernel = std::move(k);
  }
@@ -69,7 +68,7 @@ Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1,
  void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output,
                                           BinaryLogicalOperation op)
  {
-  auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+  auto k = std::make_unique<NEBinaryLogicalOperationKernel>();
    k->configure(op, input1, input2, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp

index 6ad3e1b12f43209e8442b1ec5f8add404e12931d..f6eec26032313681f454527e15745b2b5f7deeba 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp
@@ -40,13 +40,12 @@
  #include "arm_compute/runtime/NEON/functions/NECastBool.h"
  
  #include "arm_compute/core/NEON/kernels/NECastBoolKernel.h"
-#include "support/MemorySupport.h"
  
  using namespace arm_compute;
  
  void NECastBool::configure(const ITensor *input, ITensor *output)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NECastBoolKernel>();
+  auto k = std::make_unique<NECastBoolKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp

index e0ab3e025328cfc3477f5de63293a16747967b99..99fc5c57905d28f68ff479dc0a45c60dc23ea363 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
@@ -41,13 +41,12 @@
  #include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
  
  #include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
-#include "support/MemorySupport.h"
  
  using namespace arm_compute;
  
  void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups)
  {
-  auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>();
+  auto k = std::make_unique<NEEmbeddingLookupKernel>();
    k->configure(input, output, lookups);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp

index e212a03c73bdfb9330f08521e902cb190c640065..fbd88fff0a90d7ccdce18fb31cb526555f4b881b 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
@@ -66,7 +66,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
  
  void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
  {
-  auto k = support::cpp14::make_unique<NETransposeKernel>();
+  auto k = std::make_unique<NETransposeKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp

index a639f2979b731a9b3d1a77c175d7d515f5632a57..758f7dc59cb4edba67c704e430c195a1bf41276c 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
@@ -50,7 +50,8 @@
  #include <algorithm>
  #include <cmath>
  
-using namespace arm_compute;
+namespace arm_compute
+{
  using namespace arm_compute::misc::shape_calculator;
  
  namespace
@@ -164,9 +165,8 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
                                          const ITensor *biases, ITensor *output,
                                          FullyConnectedLayerInfo fc_info)
  {
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
    // Perform validate step
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
    ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate(
      input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
      fc_info));
@@ -348,7 +348,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
         (input->dimension(0) * input->dimension(1) * input->dimension(2))));
  
      // Validate flatten kernel
-    ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
+    ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
      input_to_use = &flatten_input;
    }
    else
@@ -374,9 +374,13 @@ void NEFullyConnectedLayerEx::run()
    if (!_is_prepared)
    {
      if (!_are_weights_reshaped)
+    {
        _reshape_weights_output.allocator()->allocate();
+    }
      if (!_are_weights_converted)
+    {
        _converted_weights_output.allocator()->allocate();
+    }
      _is_prepared = true;
    }
  
@@ -407,7 +411,7 @@ void NEFullyConnectedLayerEx::run()
    // Linearize input if it comes from a convolutional layer
    if (_is_fc_after_conv)
    {
-    NEScheduler::get().schedule(&_flatten_kernel, Window::DimY);
+    _flatten_kernel.run();
    }
  
    // Run matrix multiply
@@ -490,3 +494,4 @@ void NEFullyConnectedLayerEx::prepare()
    }
  #endif
  }
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp

index 234c783f94f7337a1b8bbfefc5e4040b9624750c..2199839fb8667bd9773c74e1ff40d43a69328996 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
@@ -19,6 +19,8 @@
  #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
  #include <arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h>
  #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h>
+#include "src/core/helpers/AutoConfiguration.h"
+#include <cassert>
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp

index 433c35d58885f0e48977934885e17172b516af2c..e5607ab9ae28e966aa4883e04abc664ae29fac0d 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
@@ -41,7 +41,6 @@
  #include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
  
  #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
-#include "support/MemorySupport.h"
  
  #include <utility>
  
@@ -49,7 +48,7 @@ namespace arm_compute
  {
  void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis)
  {
-  auto k = support::cpp14::make_unique<NEGatherKernelEx>();
+  auto k = std::make_unique<NEGatherKernelEx>();
    k->configure(input, indices, output, axis);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp

index 52d58accf14dcb377800d5d239673706cebec1f0..7cc6c89e72b5b759d7f2ed0fa2f657df41f99b74 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
@@ -41,14 +41,13 @@
  #include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
  
  #include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
-#include "support/MemorySupport.h"
  
  using namespace arm_compute;
  
  void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input,
                                    ITensor *output, ITensor *hits)
  {
-  auto k = support::cpp14::make_unique<NEHashtableLookupKernel>();
+  auto k = std::make_unique<NEHashtableLookupKernel>();
    k->configure(lookups, keys, input, output, hits);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp

index 275c55024208ec5ba4c80eb19e0f1a46e2e7dd29..e0620bad2bc3932c6de326944ef9d5f361944f09 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp
@@ -39,14 +39,14 @@
   */
  #include "arm_compute/runtime/NEON/functions/NEOneHot.h"
  #include "arm_compute/core/NEON/kernels/NEOneHotKernel.h"
-#include "support/MemorySupport.h"
+
  #include <utility>
  namespace arm_compute
  {
  void NEOneHot::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
                           const ITensor *off_value, ITensor *output, int axis)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NEOneHotKernel>();
+  auto k = std::make_unique<NEOneHotKernel>();
    k->configure(indices, depth, on_value, off_value, output, axis);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp

index c45c335b3400f53da1fdcfa123eb1102295c7220..a30c00ea13f12e4be1b4465f0d9881fe95c4676f 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
@@ -40,11 +40,13 @@
  
  #include "arm_compute/runtime/NEON/functions/NEReduceOperation.h"
  
-#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp

index b21717e86f5447911359bf48cacd09320279ee5e..7a1342644807169c3824d38eb2ad9287499f973c 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
@@ -40,9 +40,13 @@
  
  #include "arm_compute/runtime/NEON/functions/NEReduceSum.h"
  
-#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp

index 50311071bb01b9340d3a14a5403d3d0efd2ef798..4675121b2d8a9eeed93e8e1ff615857637927d4e 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -44,6 +44,7 @@
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
  #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
  
  using namespace arm_compute::misc::shape_calculator;
  
diff --git a/compute/cker/include/cker/Types.h b/compute/cker/include/cker/Types.h

index 10f3ecbd364b0fa465bf334090cc09efcfd89885..c5dd63b5b1ddf1c245861c4d6af297aa16168e2e 100644 (file)
--- a/compute/cker/include/cker/Types.h
+++ b/compute/cker/include/cker/Types.h
@@ -111,6 +111,8 @@ struct SoftmaxParams
    int32_t zero_point;
    float scale;
    float *table;
+  uint8_t *uint8_table1;
+  uint8_t *uint8_table2;
  };
  
  struct PackParams
diff --git a/compute/cker/include/cker/Utils.h b/compute/cker/include/cker/Utils.h

index f73c01523119e75776e3dd02322b7ed8a9001e7e..9aae0a957bc84704c33ba2df59b89a551a962e41 100644 (file)
--- a/compute/cker/include/cker/Utils.h
+++ b/compute/cker/include/cker/Utils.h
@@ -20,6 +20,8 @@
  
  #include "Shape.h"
  
+#include "neon/neon_check.h"
+
  #include <algorithm>
  #include <cstdint>
  #include <fixedpoint/fixedpoint.h>
@@ -29,6 +31,11 @@ namespace nnfw
  namespace cker
  {
  
+template <typename T> struct is_quant8
+{
+  static constexpr bool value = std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value;
+};
+
  template <typename T>
  inline T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
  {
@@ -106,6 +113,34 @@ inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x,
      gemmlowp::SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
  }
  
+#ifdef USE_NEON
+inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(int32x4x4_t input_val,
+                                                      int32_t quantized_multiplier, int32_t shift)
+{
+  const int left_shift = std::max(shift, 0);
+  const int right_shift = std::min(shift, 0);
+  int32x4x4_t result;
+
+  int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
+  int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
+  int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
+
+  result.val[0] = vrshlq_s32(
+    vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup), right_shift_dup);
+
+  result.val[1] = vrshlq_s32(
+    vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup), right_shift_dup);
+
+  result.val[2] = vrshlq_s32(
+    vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup), right_shift_dup);
+
+  result.val[3] = vrshlq_s32(
+    vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup), right_shift_dup);
+
+  return result;
+}
+#endif
+
  inline int NodeOffset(int b, int h, int w, int height, int width)
  {
    return (b * height + h) * width + w;
diff --git a/compute/cker/include/cker/operation/AveragePool.h b/compute/cker/include/cker/operation/AveragePool.h

index a70e39cc98ccd821f97e7b792141b639d7ad15b0..e10f02ad442d60cdcf6ee6b6c7b8be4d96a172cf 100644 (file)
--- a/compute/cker/include/cker/operation/AveragePool.h
+++ b/compute/cker/include/cker/operation/AveragePool.h
@@ -395,6 +395,129 @@ void AveragePool<uint8_t>(const PoolParams &params, const Shape &input_shape,
    }
  }
  
+template <>
+void AveragePool<int8_t>(const PoolParams &params, const Shape &input_shape,
+                         const int8_t *input_data, const Shape &output_shape, int8_t *output_data)
+{
+  // Here, and in other pooling ops, in order to maintain locality of reference,
+  // to minimize some recalculations, and to load into NEON vector registers, we
+  // use an inner loop down the depth. Since depths can be large and hence we
+  // would need arbitrarily large temporary storage, we divide the work up into
+  // depth tranches just within the batch loop.
+  static constexpr int kPoolingAccTrancheSize = 256;
+
+  assert(params.quantized_activation_min <= params.quantized_activation_max);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+
+  int32_t acc[kPoolingAccTrancheSize];
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    // We proceed through the depth in tranches (see comment above). The
+    // depth_base is the depth at the beginning of the tranche. The
+    // tranche_depth is the depth dimension of the tranche.
+    for (int depth_base = 0; depth_base < depth; depth_base += kPoolingAccTrancheSize)
+    {
+      const int tranche_depth = std::min(depth - depth_base, kPoolingAccTrancheSize);
+      for (int out_y = 0; out_y < output_height; ++out_y)
+      {
+        for (int out_x = 0; out_x < output_width; ++out_x)
+        {
+          const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+          const int filter_count =
+            (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+          memset(acc, 0, tranche_depth * sizeof(acc[0]));
+          const int8_t *input_ptr =
+            input_data + depth_base +
+            depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+          for (int fy = filter_y_start; fy < filter_y_end; fy++)
+          {
+            const int8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
+            for (int fx = filter_x_start; fx < filter_x_end; fx++)
+            {
+              const int8_t *input_channel_ptr = input_row_ptr;
+              int channel = 0;
+#ifdef USE_NEON
+              for (; channel <= tranche_depth - 16; channel += 16)
+              {
+                int16x4_t acc_reg[4];
+                int8x16_t input_reg = vld1q_s8(input_channel_ptr);
+                input_channel_ptr += 16;
+                acc_reg[0] = vget_low_s16(vmovl_s8(vget_low_s8(input_reg)));
+                acc_reg[1] = vget_high_s16(vmovl_s8(vget_low_s8(input_reg)));
+                acc_reg[2] = vget_low_s16(vmovl_s8(vget_high_s8(input_reg)));
+                acc_reg[3] = vget_high_s16(vmovl_s8(vget_high_s8(input_reg)));
+                for (int i = 0; i < 4; i++)
+                {
+                  vst1q_s32(acc + channel + 4 * i,
+                            vaddw_s16(vld1q_s32(acc + channel + 4 * i), acc_reg[i]));
+                }
+              }
+              for (; channel <= tranche_depth - 8; channel += 8)
+              {
+                int16x4_t acc_reg[2];
+                int16x8_t input_reg = vmovl_s8(vld1_s8(input_channel_ptr));
+                input_channel_ptr += 8;
+                acc_reg[0] = vget_low_s16(input_reg);
+                acc_reg[1] = vget_high_s16(input_reg);
+                for (int i = 0; i < 2; i++)
+                {
+                  vst1q_s32(acc + channel + 4 * i,
+                            vaddw_s16(vld1q_s32(acc + channel + 4 * i), acc_reg[i]));
+                }
+              }
+#endif
+              for (; channel < tranche_depth; ++channel)
+              {
+                acc[channel] += *input_channel_ptr++;
+              }
+              input_row_ptr += depth;
+            }
+          }
+          int8_t *output_ptr = output_data + Offset(output_shape, batch, out_y, out_x, depth_base);
+          int channel = 0;
+#ifdef USE_NEON
+          for (; channel <= tranche_depth - 8; channel += 8)
+          {
+            int16_t buf[8];
+            for (int i = 0; i < 8; i++)
+            {
+              buf[i] = acc[channel + i] > 0 ? (acc[channel + i] + filter_count / 2) / filter_count
+                                            : (acc[channel + i] - filter_count / 2) / filter_count;
+            }
+            int8x8_t buf8 = vqmovn_s16(vld1q_s16(buf));
+            buf8 = vmin_s8(buf8, vdup_n_s8(params.quantized_activation_max));
+            buf8 = vmax_s8(buf8, vdup_n_s8(params.quantized_activation_min));
+            vst1_s8(output_ptr + channel, buf8);
+          }
+#endif
+          for (; channel < tranche_depth; ++channel)
+          {
+            int16_t a = acc[channel] > 0 ? (acc[channel] + filter_count / 2) / filter_count
+                                         : (acc[channel] - filter_count / 2) / filter_count;
+            a = std::max<int16_t>(a, params.quantized_activation_min);
+            a = std::min<int16_t>(a, params.quantized_activation_max);
+            output_ptr[channel] = static_cast<int8_t>(a);
+          }
+        }
+      }
+    }
+  }
+}
+
  } // namespace cker
  } // namespace nnfw
  
diff --git a/compute/cker/include/cker/operation/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/BinaryArithmeticOps.h

index fe5f87746db17f3bf2abbdc68f8f1f669f8809d9..c7878496a782845947b3fc7f46130fd72ebf249a 100644 (file)
--- a/compute/cker/include/cker/operation/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/BinaryArithmeticOps.h
@@ -190,34 +190,34 @@ inline bool ProcessBroadcastShapes(const Shape &shape0, const Shape &shape1,
  }
  
  template <BinaryArithmeticOpType op_type, typename T>
-inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                               const T *input1_data, const Shape &input2_shape,
-                               const T *input2_data, const Shape &output_shape, T *output_data)
+inline typename std::enable_if_t<!is_quant8<T>::value>
+BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                   const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                   const Shape &output_shape, T *output_data)
  {
    reference::BinaryArithmeticOp(params, input1_shape, input1_data, input2_shape, input2_data,
                                  output_shape, output_data, GetBinaryArtithmeticFn<op_type, T>());
  }
  
-template <BinaryArithmeticOpType op_type>
-inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                               const uint8_t *input1_data, const Shape &input2_shape,
-                               const uint8_t *input2_data, const Shape &output_shape,
-                               uint8_t *output_data)
+template <BinaryArithmeticOpType op_type, typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                   const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                   const Shape &output_shape, T *output_data)
  {
    switch (op_type)
    {
      case nnfw::cker::BinaryArithmeticOpType::ADD:
      case nnfw::cker::BinaryArithmeticOpType::SUB:
-      optimized::AddQuant8(params, input1_shape, input1_data, input2_shape, input2_data,
-                           output_shape, output_data);
+      optimized::Add(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+                     output_data);
        break;
      case nnfw::cker::BinaryArithmeticOpType::MUL:
-      optimized::MulQuant8(params, input1_shape, const_cast<uint8_t *>(input1_data), input2_shape,
-                           const_cast<uint8_t *>(input2_data), output_shape, output_data);
+      optimized::Mul(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+                     output_data);
        break;
      case nnfw::cker::BinaryArithmeticOpType::DIV:
        throw std::runtime_error{"Quant8 Asymm NYI"};
-
      default:
        assert(false);
        break;
@@ -256,33 +256,32 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
  }
  
  template <BinaryArithmeticOpType op_type, typename T>
-inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                                        const T *input1_data, const Shape &input2_shape,
-                                        const T *input2_data, const Shape &output_shape,
-                                        T *output_data)
+inline typename std::enable_if_t<!is_quant8<T>::value>
+BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                            const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                            const Shape &output_shape, T *output_data)
  {
    reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
                                               input2_data, output_shape, output_data,
                                               GetBinaryArtithmeticFn<op_type, T>());
  }
  
-template <BinaryArithmeticOpType op_type>
-inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                                        const uint8_t *input1_data, const Shape &input2_shape,
-                                        const uint8_t *input2_data, const Shape &output_shape,
-                                        uint8_t *output_data)
+template <BinaryArithmeticOpType op_type, typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                            const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                            const Shape &output_shape, T *output_data)
  {
    switch (op_type)
    {
      case nnfw::cker::BinaryArithmeticOpType::ADD:
      case nnfw::cker::BinaryArithmeticOpType::SUB:
-      optimized::BroadcastAddDispatchQuant8(params, input1_shape, input1_data, input2_shape,
-                                            input2_data, output_shape, output_data);
+      optimized::BroadcastAddDispatch(params, input1_shape, input1_data, input2_shape, input2_data,
+                                      output_shape, output_data);
        break;
      case nnfw::cker::BinaryArithmeticOpType::MUL:
-      optimized::BroadcastMulDispatchQuant8(
-        params, input1_shape, const_cast<uint8_t *>(input1_data), input2_shape,
-        const_cast<uint8_t *>(input2_data), output_shape, output_data);
+      optimized::BroadcastMulDispatch(params, input1_shape, input1_data, input2_shape, input2_data,
+                                      output_shape, output_data);
        break;
      case nnfw::cker::BinaryArithmeticOpType::DIV:
      case nnfw::cker::BinaryArithmeticOpType::POW:
diff --git a/compute/cker/include/cker/operation/BroadcastTo.h b/compute/cker/include/cker/operation/BroadcastTo.h

index 5068eca960da5552a77ccf8d1f6e69cc700d4ee2..145deda29e6c265f0eb53a2fe34797d02ce4cee5 100644 (file)
--- a/compute/cker/include/cker/operation/BroadcastTo.h
+++ b/compute/cker/include/cker/operation/BroadcastTo.h
@@ -126,7 +126,7 @@ template <typename Device, typename T> struct BroadcastTo
      }
    }
  };
-} // functor
+} // namespace functor
  
  template <typename T>
  inline void BroadcastTo(const Shape &input_shape, T *input_data, const Shape &output_shape,
diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h

index b20bac3accc78e5d1634ae024cbbca4c48b7fefd..16c937a27717a794c5a41b5e0544d41878d32b8b 100644 (file)
--- a/compute/cker/include/cker/operation/Conv.h
+++ b/compute/cker/include/cker/operation/Conv.h
@@ -138,6 +138,17 @@ public:
      }
    }
  
+  void operator()(const ConvParams &params, const Shape &input_shape, const int8_t *input_data,
+                  const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
+                  const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
+  {
+    reference::Conv(params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+                    input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+                    output_shape, output_data);
+  }
+  std::vector<int32_t> &per_channel_output_multiplier() { return _per_channel_output_multiplier; }
+  std::vector<int> &per_channel_output_shift() { return _per_channel_output_shift; }
+
  private:
    bool usableMultiThreaded(PaddingType padding_type, uint32_t dilation_width_factor,
                             int32_t dilation_height_factor)
@@ -180,6 +191,9 @@ private:
    Shape _im2col_shape;
    bool _need_im2col;
    bool _prepared;
+  // Per channel output multiplier and shift.
+  std::vector<int32_t> _per_channel_output_multiplier;
+  std::vector<int> _per_channel_output_shift;
  };
  } // namespace cker
  } // namespace nnfw
diff --git a/compute/cker/include/cker/operation/DepthwiseConv.h b/compute/cker/include/cker/operation/DepthwiseConv.h

index 436ddd8c918a4f6243289ce359627ca20b5e94a3..06ee780bb4bb390dfeb88b7aa72f93376a6e8998 100644 (file)
--- a/compute/cker/include/cker/operation/DepthwiseConv.h
+++ b/compute/cker/include/cker/operation/DepthwiseConv.h
@@ -24,6 +24,7 @@
  #include "cker/neon/neon_check.h"
  #include "cker/operation/optimized/DepthwiseConvFloat.h"
  #include "cker/operation/optimized/DepthwiseConvUint8.h"
+#include "cker/operation/optimized/integer_ops/DepthwiseConvInt8.h"
  #include "cker/CpuBackendThreadpool.h"
  
  namespace nnfw
diff --git a/compute/cker/include/cker/operation/Einsum.h b/compute/cker/include/cker/operation/Einsum.h

index 13fccfd1524f63a62c28e887ddb3791119e710bd..6721a75080bf1c7b854de07342cb661a6ca55f29 100644 (file)
--- a/compute/cker/include/cker/operation/Einsum.h
+++ b/compute/cker/include/cker/operation/Einsum.h
@@ -177,7 +177,7 @@ inline Shape copyShape(const Shape &shape)
  {
    return Shape::ExtendedShape(shape.DimensionsCount(), shape);
  }
-}
+} // namespace
  
  class Einsum
  {
diff --git a/compute/cker/include/cker/operation/Fill.h b/compute/cker/include/cker/operation/Fill.h

index d657acc120d9635e0c875fdca34fbbcb52e2a5ab..f88c3a5fb61eaed8e5f30bb4f977d09a0ca69742 100644 (file)
--- a/compute/cker/include/cker/operation/Fill.h
+++ b/compute/cker/include/cker/operation/Fill.h
@@ -24,7 +24,8 @@ namespace nnfw
  {
  namespace cker
  {
-template <typename T> inline void Fill(const T value_data, const Shape &output_shape, T output_data)
+template <typename T>
+inline void Fill(const T *value_data, const Shape &output_shape, T *output_data)
  {
    int output_size = output_shape.FlatSize();
    for (int i = 0; i < output_size; i++)
diff --git a/compute/cker/include/cker/operation/Helper/RandomDistributions.h b/compute/cker/include/cker/operation/Helper/RandomDistributions.h

index cbebff1421720b5ed5c68b3337d076f53a2e6fbe..f16e5019d9a5dfd8b57a449d1d8366b97ee06621 100644 (file)
--- a/compute/cker/include/cker/operation/Helper/RandomDistributions.h
+++ b/compute/cker/include/cker/operation/Helper/RandomDistributions.h
@@ -772,7 +772,7 @@ PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32_t x0, uint32_t x1)
  }
  
  } // namespace random
-} // namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
  
  #endif // __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
diff --git a/compute/cker/include/cker/operation/Helper/RandomOp.h b/compute/cker/include/cker/operation/Helper/RandomOp.h

index 7dc51fe94f9317d576844c8f01e588900edebf81..6b7049ddfd6f5e6f7c44167d938791f7f48dc3d4 100644 (file)
--- a/compute/cker/include/cker/operation/Helper/RandomOp.h
+++ b/compute/cker/include/cker/operation/Helper/RandomOp.h
@@ -47,6 +47,6 @@ template <class Distribution> struct FillPhiloxRandom<CPUDevice, Distribution>
  };
  
  } // namespace functor
-} // namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
  #endif // __NNFW_CKER_HELPER_RANDOM_OP_H__
diff --git a/compute/cker/include/cker/operation/Helper/RandomOpCpu.h b/compute/cker/include/cker/operation/Helper/RandomOpCpu.h

index 6e9ffbdfd1c156ef3c706c6bbadc9b22b68ebfa0..c99f69709f0487610febe371df3e560cb74e76b6 100644 (file)
--- a/compute/cker/include/cker/operation/Helper/RandomOpCpu.h
+++ b/compute/cker/include/cker/operation/Helper/RandomOpCpu.h
@@ -157,7 +157,7 @@ operator()(random::PhiloxRandom gen, typename Distribution::ResultElementType *d
  
  } // namespace functor
  
-} // end namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
  
  #endif // __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
diff --git a/compute/cker/include/cker/operation/Quantize.h b/compute/cker/include/cker/operation/Quantize.h

index 5c82d111fac0c342b2e77aa7b8633f0d5c9b2267..8e5fc22bb96a4a33c1fb9aa5d23b28074c7cb020 100644 (file)
--- a/compute/cker/include/cker/operation/Quantize.h
+++ b/compute/cker/include/cker/operation/Quantize.h
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.*
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -20,8 +21,10 @@
  #include "cker/Shape.h"
  #include "cker/Types.h"
  #include "cker/Utils.h"
-#include <stdexcept>
+#include <cassert>
  #include <iostream>
+#include <stdexcept>
+
  namespace nnfw
  {
  namespace cker
@@ -41,6 +44,251 @@ inline void Quantize(const Shape &input_shape, const InputT *input_data, const S
      output_data[i] = clamped;
    }
  }
+
+inline void Quantize(const int32_t *multiplier, const int32_t *shift, int32_t channel_size,
+                     int32_t total_size, int32_t output_zp, int32_t output_min, int32_t output_max,
+                     int32_t *scratch, int8_t *output)
+{
+  // Here we're trying to quantize the raw accumulators:
+  //        output_channels
+  //       data data data data data
+  // rows  data data data data data
+  //       data data data data data
+  //          ....
+  //
+  // In order to minimize the reload of the multipliers & shifts, once we load
+  // the multipliers & shifts, we load & quantize the raw accumulators for every
+  // row.
+#ifdef USE_NEON
+  const int32x4_t output_offset_vec = vdupq_n_s32(output_zp);
+  const int32x4_t output_activation_min_vec = vdupq_n_s32(output_min);
+  const int32x4_t output_activation_max_vec = vdupq_n_s32(output_max);
+  const int32x4_t zeros = vdupq_n_s32(0);
+#endif
+
+  assert(total_size % channel_size == 0);
+  const int32_t rows = total_size / channel_size;
+
+  int c = 0;
+
+#ifdef USE_NEON
+  using gemmlowp::RoundingDivideByPOT;
+  for (; c <= channel_size - 8; c += 8)
+  {
+    int32x4_t out_shift_1 = vld1q_s32(shift + c);
+    int32x4_t out_shift_2 = vld1q_s32(shift + c + 4);
+    int32x4_t left_shift_1 = vmaxq_s32(out_shift_1, zeros);
+    int32x4_t left_shift_2 = vmaxq_s32(out_shift_2, zeros);
+
+    // Right shift will be performed as left shift with negative values.
+    int32x4_t right_shift_1 = vminq_s32(out_shift_1, zeros);
+    int32x4_t right_shift_2 = vminq_s32(out_shift_2, zeros);
+
+    int32x4_t out_mul_1 = vld1q_s32(multiplier + c);
+    int32x4_t out_mul_2 = vld1q_s32(multiplier + c + 4);
+    for (int n = 0; n < rows; ++n)
+    {
+      int loc = n * channel_size + c;
+      int32x4_t acc_1 = vld1q_s32(scratch + loc);
+      int32x4_t acc_2 = vld1q_s32(scratch + loc + 4);
+
+      // Saturating Rounding Doubling High Mul.
+      acc_1 = vshlq_s32(acc_1, left_shift_1);
+      acc_1 = vqrdmulhq_s32(acc_1, out_mul_1);
+      acc_2 = vshlq_s32(acc_2, left_shift_2);
+      acc_2 = vqrdmulhq_s32(acc_2, out_mul_2);
+
+      // Rounding Dividing By POT.
+      acc_1 = vrshlq_s32(acc_1, right_shift_1);
+      acc_2 = vrshlq_s32(acc_2, right_shift_2);
+
+      // Add the output offset.
+      acc_1 = vaddq_s32(acc_1, output_offset_vec);
+      acc_2 = vaddq_s32(acc_2, output_offset_vec);
+
+      // Apply the activation function.
+      acc_1 = vmaxq_s32(acc_1, output_activation_min_vec);
+      acc_1 = vminq_s32(acc_1, output_activation_max_vec);
+      acc_2 = vmaxq_s32(acc_2, output_activation_min_vec);
+      acc_2 = vminq_s32(acc_2, output_activation_max_vec);
+
+      // Saturating cast to int8 and store to destination.
+      const int16x4_t acc_s16_1 = vqmovn_s32(acc_1);
+      const int16x4_t acc_s16_2 = vqmovn_s32(acc_2);
+      const int16x8_t res_s16 = vcombine_s16(acc_s16_1, acc_s16_2);
+      const int8x8_t res_s8 = vqmovn_s16(res_s16);
+      vst1_s8(output + loc, res_s8);
+    }
+  }
+
+#endif // USE_NEON
+  // Handle leftover values, one by one. This is very slow.
+  for (; c < channel_size; c++)
+  {
+    for (int n = 0; n < rows; ++n)
+    {
+      int loc = n * channel_size + c;
+      int32_t acc = scratch[loc];
+      acc = MultiplyByQuantizedMultiplier(acc, multiplier[c], shift[c]);
+      acc += output_zp;
+      acc = std::max(acc, output_min);
+      acc = std::min(acc, output_max);
+      output[loc] = static_cast<int8_t>(acc);
+    }
+  }
+}
+
+template <typename input_type, typename output_type>
+inline void Requantize(const input_type *input_data, int32_t size,
+                       int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                       int32_t input_zeropoint, int32_t output_zeropoint, output_type *output_data)
+{
+  assert(!"Requantize: not supported type. It shouldn't reach here.");
+  UNUSED_ALL(input_data, size, effective_scale_multiplier, effective_scale_shift, input_zeropoint,
+             output_zeropoint, output_data);
+}
+
+template <>
+inline void Requantize<uint8_t, int8_t>(const uint8_t *input_data, int32_t size,
+                                        int32_t effective_scale_multiplier,
+                                        int32_t effective_scale_shift, int32_t input_zeropoint,
+                                        int32_t output_zeropoint, int8_t *output_data)
+{
+  static constexpr int32_t kMinOutput = std::numeric_limits<int8_t>::min();
+  static constexpr int32_t kMaxOutput = std::numeric_limits<int8_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  // Constants.
+  const int32x4_t input_zero_point_dup = vdupq_n_s32(-input_zeropoint);
+  const int32x4_t output_zero_point_dup = vdupq_n_s32(output_zeropoint);
+  const int32x4_t min_val_dup = vdupq_n_s32(kMinOutput);
+  const int32x4_t max_val_dup = vdupq_n_s32(kMaxOutput);
+
+  for (; i <= size - 16; i += 16)
+  {
+    const uint8x16_t input_vec = vld1q_u8(input_data + i);
+    const uint16x8_t first_half = vmovl_u8(vget_low_u8(input_vec));
+    const uint16x8_t second_half = vmovl_u8(vget_high_u8(input_vec));
+    int32x4x4_t input;
+    input.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(first_half)));
+    input.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(first_half)));
+    input.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(second_half)));
+    input.val[3] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(second_half)));
+    input.val[0] = vaddq_s32(input.val[0], input_zero_point_dup);
+    input.val[1] = vaddq_s32(input.val[1], input_zero_point_dup);
+    input.val[2] = vaddq_s32(input.val[2], input_zero_point_dup);
+    input.val[3] = vaddq_s32(input.val[3], input_zero_point_dup);
+
+    int32x4x4_t result =
+      MultiplyByQuantizedMultiplier4Rows(input, effective_scale_multiplier, effective_scale_shift);
+
+    result.val[0] = vaddq_s32(result.val[0], output_zero_point_dup);
+    result.val[1] = vaddq_s32(result.val[1], output_zero_point_dup);
+    result.val[2] = vaddq_s32(result.val[2], output_zero_point_dup);
+    result.val[3] = vaddq_s32(result.val[3], output_zero_point_dup);
+    result.val[0] = vmaxq_s32(vminq_s32(result.val[0], max_val_dup), min_val_dup);
+    result.val[1] = vmaxq_s32(vminq_s32(result.val[1], max_val_dup), min_val_dup);
+    result.val[2] = vmaxq_s32(vminq_s32(result.val[2], max_val_dup), min_val_dup);
+    result.val[3] = vmaxq_s32(vminq_s32(result.val[3], max_val_dup), min_val_dup);
+
+    const int16x4_t narrowed_val_1 = vqmovn_s32(result.val[0]);
+    const int16x4_t narrowed_val_2 = vqmovn_s32(result.val[1]);
+    const int16x4_t narrowed_val_3 = vqmovn_s32(result.val[2]);
+    const int16x4_t narrowed_val_4 = vqmovn_s32(result.val[3]);
+    const int16x8_t output_first_half = vcombine_s16(narrowed_val_1, narrowed_val_2);
+    const int16x8_t output_second_half = vcombine_s16(narrowed_val_3, narrowed_val_4);
+    const int8x8_t narrowed_first_half = vqmovn_s16(output_first_half);
+    const int8x8_t narrowed_second_half = vqmovn_s16(output_second_half);
+    const int8x16_t narrowed_result = vcombine_s8(narrowed_first_half, narrowed_second_half);
+    vst1q_s8(output_data + i, narrowed_result);
+  }
+
+#endif
+  for (; i < size; ++i)
+  {
+    const int32_t input = input_data[i] - input_zeropoint;
+    const int32_t output =
+      MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, effective_scale_shift) +
+      output_zeropoint;
+    const int32_t clamped_output = std::max(std::min(output, kMaxOutput), kMinOutput);
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
+template <>
+inline void Requantize<int8_t, uint8_t>(const int8_t *input_data, int32_t size,
+                                        int32_t effective_scale_multiplier,
+                                        int32_t effective_scale_shift, int32_t input_zeropoint,
+                                        int32_t output_zeropoint, uint8_t *output_data)
+{
+  static constexpr int32_t kMinOutput = std::numeric_limits<uint8_t>::min();
+  static constexpr int32_t kMaxOutput = std::numeric_limits<uint8_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  // Constants.
+  const int32x4_t input_zero_point_dup = vdupq_n_s32(-input_zeropoint);
+  const int32x4_t output_zero_point_dup = vdupq_n_s32(output_zeropoint);
+  const int32x4_t min_val_dup = vdupq_n_s32(kMinOutput);
+  const int32x4_t max_val_dup = vdupq_n_s32(kMaxOutput);
+
+  for (; i <= size - 16; i += 16)
+  {
+    const int8x16_t input_vec = vld1q_s8(input_data + i);
+    const int16x8_t first_half = vmovl_s8(vget_low_s8(input_vec));
+    const int16x8_t second_half = vmovl_s8(vget_high_s8(input_vec));
+    int32x4x4_t input;
+    input.val[0] = vmovl_s16(vget_low_s16(first_half));
+    input.val[1] = vmovl_s16(vget_high_s16(first_half));
+    input.val[2] = vmovl_s16(vget_low_s16(second_half));
+    input.val[3] = vmovl_s16(vget_high_s16(second_half));
+    input.val[0] = vaddq_s32(input.val[0], input_zero_point_dup);
+    input.val[1] = vaddq_s32(input.val[1], input_zero_point_dup);
+    input.val[2] = vaddq_s32(input.val[2], input_zero_point_dup);
+    input.val[3] = vaddq_s32(input.val[3], input_zero_point_dup);
+
+    int32x4x4_t result =
+      MultiplyByQuantizedMultiplier4Rows(input, effective_scale_multiplier, effective_scale_shift);
+
+    result.val[0] = vaddq_s32(result.val[0], output_zero_point_dup);
+    result.val[1] = vaddq_s32(result.val[1], output_zero_point_dup);
+    result.val[2] = vaddq_s32(result.val[2], output_zero_point_dup);
+    result.val[3] = vaddq_s32(result.val[3], output_zero_point_dup);
+    result.val[0] = vmaxq_s32(vminq_s32(result.val[0], max_val_dup), min_val_dup);
+    result.val[1] = vmaxq_s32(vminq_s32(result.val[1], max_val_dup), min_val_dup);
+    result.val[2] = vmaxq_s32(vminq_s32(result.val[2], max_val_dup), min_val_dup);
+    result.val[3] = vmaxq_s32(vminq_s32(result.val[3], max_val_dup), min_val_dup);
+
+    const uint32x4_t result_val_1_unsigned = vreinterpretq_u32_s32(result.val[0]);
+    const uint32x4_t result_val_2_unsigned = vreinterpretq_u32_s32(result.val[1]);
+    const uint32x4_t result_val_3_unsigned = vreinterpretq_u32_s32(result.val[2]);
+    const uint32x4_t result_val_4_unsigned = vreinterpretq_u32_s32(result.val[3]);
+
+    const uint16x4_t narrowed_val_1 = vqmovn_u32(result_val_1_unsigned);
+    const uint16x4_t narrowed_val_2 = vqmovn_u32(result_val_2_unsigned);
+    const uint16x4_t narrowed_val_3 = vqmovn_u32(result_val_3_unsigned);
+    const uint16x4_t narrowed_val_4 = vqmovn_u32(result_val_4_unsigned);
+    const uint16x8_t output_first_half = vcombine_u16(narrowed_val_1, narrowed_val_2);
+    const uint16x8_t output_second_half = vcombine_u16(narrowed_val_3, narrowed_val_4);
+    const uint8x8_t narrowed_first_half = vqmovn_u16(output_first_half);
+    const uint8x8_t narrowed_second_half = vqmovn_u16(output_second_half);
+    const uint8x16_t narrowed_result = vcombine_u8(narrowed_first_half, narrowed_second_half);
+    vst1q_u8(output_data + i, narrowed_result);
+  }
+
+#endif
+  for (; i < size; ++i)
+  {
+    const int32_t input = input_data[i] - input_zeropoint;
+    const int32_t output =
+      MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, effective_scale_shift) +
+      output_zeropoint;
+    const int32_t clamped_output = std::max(std::min(output, kMaxOutput), kMinOutput);
+    output_data[i] = static_cast<uint8_t>(clamped_output);
+  }
+}
+
  } // namespace cker
  } // namespace nnfw
  
diff --git a/compute/cker/include/cker/operation/Reduce.h b/compute/cker/include/cker/operation/Reduce.h

index dbf93814727c649328b4550a66e70e5aa65c47ce..f54f2e6f13f9c76f149ee188b097fc06af2b6ac0 100644 (file)
--- a/compute/cker/include/cker/operation/Reduce.h
+++ b/compute/cker/include/cker/operation/Reduce.h
@@ -46,6 +46,7 @@ inline void OptimizedReduceSum(const float *input_data, const Shape &input_shape
      input_size *= input_dims[idx];
    }
    reduce_size = input_dims[input_num_dims - 1];
+  int offset = 0;
    for (int idx = 0; idx < input_size; idx++)
    {
      int r_idx = 0;
@@ -55,14 +56,14 @@ inline void OptimizedReduceSum(const float *input_data, const Shape &input_shape
      float32x4_t tmp_data_32x4 = vld1q_f32(tmp_data);
      for (; r_idx <= reduce_size - 32; r_idx += 32)
      {
-      float32x4_t a10 = vld1q_f32(input_data + r_idx);
-      float32x4_t a11 = vld1q_f32(input_data + r_idx + 4);
-      float32x4_t a12 = vld1q_f32(input_data + r_idx + 8);
-      float32x4_t a13 = vld1q_f32(input_data + r_idx + 12);
-      float32x4_t a20 = vld1q_f32(input_data + r_idx + 16);
-      float32x4_t a21 = vld1q_f32(input_data + r_idx + 20);
-      float32x4_t a22 = vld1q_f32(input_data + r_idx + 24);
-      float32x4_t a23 = vld1q_f32(input_data + r_idx + 28);
+      float32x4_t a10 = vld1q_f32(input_data + offset + r_idx);
+      float32x4_t a11 = vld1q_f32(input_data + offset + r_idx + 4);
+      float32x4_t a12 = vld1q_f32(input_data + offset + r_idx + 8);
+      float32x4_t a13 = vld1q_f32(input_data + offset + r_idx + 12);
+      float32x4_t a20 = vld1q_f32(input_data + offset + r_idx + 16);
+      float32x4_t a21 = vld1q_f32(input_data + offset + r_idx + 20);
+      float32x4_t a22 = vld1q_f32(input_data + offset + r_idx + 24);
+      float32x4_t a23 = vld1q_f32(input_data + offset + r_idx + 28);
  
        float32x4_t x0 = vaddq_f32(a10, a20);
        float32x4_t x1 = vaddq_f32(a11, a21);
@@ -74,10 +75,23 @@ inline void OptimizedReduceSum(const float *input_data, const Shape &input_shape
        float32x4_t y2 = vaddq_f32(y0, y1);
        tmp_data_32x4 = vaddq_f32(tmp_data_32x4, y2);
      }
+    for (; r_idx <= reduce_size - 16; r_idx += 16)
+    {
+      float32x4_t a10 = vld1q_f32(input_data + offset + r_idx);
+      float32x4_t a11 = vld1q_f32(input_data + offset + r_idx + 4);
+      float32x4_t a12 = vld1q_f32(input_data + offset + r_idx + 8);
+      float32x4_t a13 = vld1q_f32(input_data + offset + r_idx + 12);
+
+      float32x4_t x0 = vaddq_f32(a10, a11);
+      float32x4_t x1 = vaddq_f32(a12, a13);
+
+      float32x4_t y0 = vaddq_f32(x0, x1);
+      tmp_data_32x4 = vaddq_f32(tmp_data_32x4, y0);
+    }
      for (; r_idx <= reduce_size - 8; r_idx += 8)
      {
-      float32x4_t a1 = vld1q_f32(input_data + r_idx);
-      float32x4_t a2 = vld1q_f32(input_data + r_idx + 4);
+      float32x4_t a1 = vld1q_f32(input_data + offset + r_idx);
+      float32x4_t a2 = vld1q_f32(input_data + offset + r_idx + 4);
        float32x4_t x = vaddq_f32(a1, a2);
        tmp_data_32x4 = vaddq_f32(tmp_data_32x4, x);
      }
@@ -88,13 +102,14 @@ inline void OptimizedReduceSum(const float *input_data, const Shape &input_shape
      {
        if (r_idx == 0)
        {
-        output_data[idx] = input_data[idx * reduce_size];
+        output_data[idx] = input_data[offset];
        }
        else
        {
-        output_data[idx] += input_data[idx * reduce_size + r_idx];
+        output_data[idx] += input_data[offset + r_idx];
        }
      }
+    offset += reduce_size;
    }
  }
  #endif // NEON
diff --git a/compute/cker/include/cker/operation/ResizeBilinear.h b/compute/cker/include/cker/operation/ResizeBilinear.h

index 8d9a7495fb83aae71877d0863283d24c85eae2a0..ae5af7bb3c529253bd6fd13f0f9b188783cfb9bc 100644 (file)
--- a/compute/cker/include/cker/operation/ResizeBilinear.h
+++ b/compute/cker/include/cker/operation/ResizeBilinear.h
@@ -264,6 +264,91 @@ void ResizeBilinear(ResizeBilinearParams &params, const Shape &input_shape,
      batches, input_height, input_width, depth, params.output_height, params.output_width,
      height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers);
  }
+
+inline void ComputeInterpolationValues(const int32_t value, const int32_t scale_10,
+                                       const bool half_pixel_centers, int32_t input_size,
+                                       int32_t *scaled_value, int32_t *lower_bound,
+                                       int32_t *upper_bound)
+{
+  if (half_pixel_centers)
+  {
+    *scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9);
+  }
+  else
+  {
+    *scaled_value = value * scale_10;
+  }
+  *lower_bound = std::max(*scaled_value / (1 << 10), 0);
+  *upper_bound = std::min(*scaled_value / (1 << 10) + 1, input_size - 1);
+}
+
+inline void ResizeBilinear(const ResizeBilinearParams &op_params,
+                           const Shape &unextended_input_shape, const int8_t *input_data,
+                           const Shape &unextended_output_shape, int8_t *output_data)
+{
+  // If half_pixel_centers is True, align_corners must be False.
+  assert(!op_params.half_pixel_centers || !op_params.align_corners);
+  assert(unextended_input_shape.DimensionsCount() <= 4);
+  assert(unextended_output_shape.DimensionsCount() <= 4);
+  const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+  const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+  const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t input_height = input_shape.Dims(1);
+  const int32_t input_width = input_shape.Dims(2);
+  const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+  const int32_t output_height = op_params.output_height;
+  const int32_t output_width = op_params.output_width;
+
+  int32_t height_scale_10 = ((1 << 10) * input_height + output_height / 2) / output_height;
+  int32_t width_scale_10 = ((1 << 10) * input_width + output_width / 2) / output_width;
+  if (op_params.align_corners && output_height > 1)
+  {
+    height_scale_10 =
+      ((1 << 10) * (input_height - 1) + (output_height - 1) / 2) / (output_height - 1);
+  }
+  if (op_params.align_corners && output_width > 1)
+  {
+    width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) / (output_width - 1);
+  }
+
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int y = 0; y < output_height; ++y)
+    {
+      int32_t input_y, y0, y1;
+      ComputeInterpolationValues(y, height_scale_10, op_params.half_pixel_centers, input_height,
+                                 &input_y, &y0, &y1);
+      for (int x = 0; x < output_width; ++x)
+      {
+        int32_t input_x, x0, x1;
+        ComputeInterpolationValues(x, width_scale_10, op_params.half_pixel_centers, input_width,
+                                   &input_x, &x0, &x1);
+        for (int c = 0; c < depth; ++c)
+        {
+          const int64_t output_20_ll =
+            static_cast<int64_t>(input_data[Offset(input_shape, b, y0, x0, c)]) *
+            ((1 << 10) - (input_y - (1 << 10) * y0)) * ((1 << 10) - (input_x - (1 << 10) * x0));
+          const int64_t output_20_lu =
+            static_cast<int64_t>(input_data[Offset(input_shape, b, y1, x0, c)]) *
+            (input_y - (1 << 10) * y0) * ((1 << 10) - (input_x - (1 << 10) * x0));
+          const int64_t output_20_rl =
+            static_cast<int64_t>(input_data[Offset(input_shape, b, y0, x1, c)]) *
+            ((1 << 10) - (input_y - (1 << 10) * y0)) * (input_x - (1 << 10) * x0);
+          const int64_t output_20_ru =
+            static_cast<int64_t>(input_data[Offset(input_shape, b, y1, x1, c)]) *
+            (input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0);
+          const int64_t output_20 = output_20_ll + output_20_lu + output_20_rl + output_20_ru;
+          const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19);
+          const int8_t interpolation = static_cast<int8_t>((output_20 + round) / (1 << 20));
+          output_data[Offset(output_shape, b, y, x, c)] = interpolation;
+        }
+      }
+    }
+  }
+}
+
  } // namespace cker
  } // namespace nnfw
  
diff --git a/compute/cker/include/cker/operation/SoftMax.h b/compute/cker/include/cker/operation/SoftMax.h

index 620c1f96896fcb5da8b64f15336898e8cde06797..35ecde4ba9d53a890f0d56c6f9739bbf16a6a34a 100644 (file)
--- a/compute/cker/include/cker/operation/SoftMax.h
+++ b/compute/cker/include/cker/operation/SoftMax.h
@@ -23,6 +23,10 @@
  #include "cker/Types.h"
  #include "cker/eigen/Utils.h"
  
+#if __aarch64__ && __clang__
+#define TFLITE_SOFTMAX_USE_UINT16_LUT
+#endif
+
  #include <Eigen/Core>
  #include <fixedpoint/fixedpoint.h>
  #include <cmath>
@@ -69,7 +73,7 @@ inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const
      }
    }
  }
-}
+} // namespace reference
  
  // Performs softmax along the input of size (input_size * batch_size).
  inline void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
@@ -127,87 +131,306 @@ inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const
    out_mat.array().rowwise() *= scale;
  }
  
-inline void Softmax(const SoftmaxParams &params, const Shape &input_shape,
-                    const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
-{
-  const int32_t input_beta_multiplier = params.input_multiplier;
-  const int32_t input_beta_left_shift = params.input_left_shift;
-  const int diff_min = params.diff_min;
-  // The representation chosen for the input to the exp() function is Q5.26.
-  // We need to leave extra space since values that we skip might be as large as
-  // -32 before multiplying by input_beta_multiplier, and therefore as large as
-  // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
-  // accumulation, but exp(-16) definitely is.
-  static const int kScaledDiffIntegerBits = 5;
-  static const int kAccumulationIntegerBits = 12;
-  using FixedPointScaledDiff = gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
-  using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
-  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
+template <typename T> inline int32_t QuantizeSoftmaxOutput(float prob_rescaled, int32_t zero_point)
+{
+  const int32_t prob_rnd = static_cast<int32_t>(std::round(prob_rescaled));
+  return prob_rnd + zero_point;
+}
+
+#if !__aarch64__
+// With ARM64, rounding is faster than add + truncation.
+template <> inline int32_t QuantizeSoftmaxOutput<uint8_t>(float prob_rescaled, int32_t)
+{
+  return static_cast<int32_t>(prob_rescaled + 0.5f);
+}
+#endif
+
+inline void PopulateSoftmaxLookupTable(float *table, float input_scale, float beta)
+{
+  const float scale = -input_scale * beta;
+  const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+  for (int32_t val = 0; val <= max_uint8; ++val)
+  {
+    table[max_uint8 - val] = expf(scale * val);
+  }
+}
  
+template <typename In, typename Out>
+inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const In *input_data,
+                    const Shape &output_shape, Out *output_data)
+{
    const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  const int excluding_last_dim = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int last_dim = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
  
-  for (int i = 0; i < outer_size; ++i)
+  const int32_t clamp_max = std::numeric_limits<Out>::max();
+  const int32_t clamp_min = std::numeric_limits<Out>::min();
+  for (int i = 0; i < excluding_last_dim; ++i)
    {
-    uint8_t max_in_row = 0;
-    for (int c = 0; c < depth; ++c)
+    int32_t max_val = std::numeric_limits<In>::min();
+    // Find max quantized value.
+    for (int j = 0; j < last_dim; ++j)
      {
-      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
+      max_val = std::max(max_val, static_cast<int32_t>(input_data[j]));
      }
  
-    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
-    for (int c = 0; c < depth; ++c)
+    float sum_exp = 0.0f;
+    const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+    const float *table_offset = &params.table[max_uint8 - max_val];
+    // Calculate normalizer sum(exp(x)).
+    for (int j = 0; j < last_dim; ++j)
      {
-      int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min)
-      {
-        const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
-          input_diff, input_beta_multiplier, input_beta_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-          FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
-                                      exp_on_negative_values(scaled_diff_f8));
-      }
+      sum_exp += table_offset[input_data[j]];
      }
  
-    int32_t fixed_sum_of_exps = sum_of_exps.raw();
-    int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(fixed_sum_of_exps));
-    // This is the number of bits to the left of the binary point above 1.0.
-    // Consider fixed_sum_of_exps=1.25.  In that case shifted_scale=0.8 and
-    // no later adjustment will be needed.
-    int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
-    int32_t shifted_sum_minus_one =
-      static_cast<int32_t>((static_cast<uint32_t>(fixed_sum_of_exps) << headroom_plus_one) -
-                           (static_cast<uint32_t>(1) << 31));
+    const float inv_sum_exp = 1.0f / (sum_exp * params.scale);
+    // Normalize and quantize probabilities.
+    for (int j = 0; j < last_dim; ++j)
+    {
+      const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp;
+      const int32_t prob_quantized = QuantizeSoftmaxOutput<Out>(prob_rescaled, params.zero_point);
+      output_data[j] = static_cast<Out>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+    }
+    input_data += last_dim;
+    output_data += last_dim;
+  }
+}
+
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+// Looks up each element of <indices> in <table>, returns them in a vector.
+inline uint8x16_t aarch64_lookup_vector(const uint8x16x4_t table[4], uint8x16_t indices)
+{
+  // Look up in 1st quarter of the table: top 2 bits of indices == 00
+  uint8x16_t output1 = vqtbl4q_u8(table[0], indices);
+  // Look up in 2nd quarter of the table: top 2 bits of indices == 01
+  uint8x16_t output2 = vqtbl4q_u8(table[1], veorq_u8(indices, vdupq_n_u8(0x40)));
+  // Look up in 3rd quarter of the table: top 2 bits of indices == 10
+  uint8x16_t output3 = vqtbl4q_u8(table[2], veorq_u8(indices, vdupq_n_u8(0x80)));
+  // Look up in 4th quarter of the table: top 2 bits of indices == 11
+  uint8x16_t output4 = vqtbl4q_u8(table[3], veorq_u8(indices, vdupq_n_u8(0xc0)));
+
+  // Combine result of the 4 lookups.
+  return vorrq_u8(vorrq_u8(output1, output2), vorrq_u8(output3, output4));
+}
  
-    FixedPoint0 shifted_scale =
-      one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one));
+inline void PopulateSoftmaxUInt8LookupTable(uint8_t *uint8_table1, uint8_t *uint8_table2,
+                                            float input_scale, float beta)
+{
+  const float scale = input_scale * beta;
+  const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+  const int32_t max_uint16 = std::numeric_limits<uint16_t>::max();
  
-    for (int c = 0; c < depth; ++c)
+  for (int32_t val = 0; val <= max_uint8; ++val)
+  {
+    float input_to_exp = scale * (val - max_uint8);
+    int32_t temp = static_cast<int>(expf(input_to_exp) * max_uint16 + 0.5);
+    temp = std::min(max_uint16, temp);
+    uint8_t part1 = temp >> 8;
+    uint8_t part2 = temp & 0xff;
+    uint8_table1[val] = static_cast<uint8_t>(part1);
+    uint8_table2[val] = static_cast<uint8_t>(part2);
+  }
+}
+
+inline int FindMaxValue(int size, const uint8_t *input_data, uint8_t offset)
+{
+  int32_t max_val = std::numeric_limits<uint8_t>::min();
+  int j = 0;
+
+  uint8x16_t max_val_dup = vdupq_n_u8(max_val);
+  uint8x16_t offset_dup = vdupq_n_u8(offset);
+  for (; j <= size - 16; j += 16)
+  {
+    uint8x16_t input_value = vld1q_u8(input_data + j);
+    input_value = veorq_u8(input_value, offset_dup);
+    max_val_dup = vmaxq_u8(input_value, max_val_dup);
+  }
+  max_val = std::max(max_val, static_cast<int32_t>(vmaxvq_u8(max_val_dup)));
+
+  for (; j < size; ++j)
+  {
+    max_val = std::max(max_val, static_cast<int32_t>(input_data[j] ^ offset));
+  }
+  return max_val;
+}
+
+#ifdef USE_NEON
+// Value_to_store layout:
+// [high_high, high_low, low_high, low_low].
+inline void StoreValue(int32x4x4_t value_to_store, int8_t *output)
+{
+  const int16x8_t result_1 =
+    vcombine_s16(vqmovn_s32(value_to_store.val[1]), vqmovn_s32(value_to_store.val[0]));
+  const int16x8_t result_2 =
+    vcombine_s16(vqmovn_s32(value_to_store.val[3]), vqmovn_s32(value_to_store.val[2]));
+  const int8x16_t result = vcombine_s8(vqmovn_s16(result_2), vqmovn_s16(result_1));
+  vst1q_s8(output, result);
+}
+
+// Value_to_store layout:
+// [high_high, high_low, low_high, low_low].
+inline void StoreValue(int32x4x4_t value_to_store, uint8_t *output)
+{
+  const uint16x8_t result_1 =
+    vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[1])),
+                 vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[0])));
+  const uint16x8_t result_2 =
+    vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[3])),
+                 vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[2])));
+  const uint8x16_t result = vcombine_u8(vqmovn_u16(result_2), vqmovn_u16(result_1));
+  vst1q_u8(output, result);
+}
+
+#endif
+
+template <typename In, typename Out>
+inline void SoftmaxInt8LUT(const SoftmaxParams &params, const Shape &input_shape,
+                           const In *input_data, const Shape &output_shape, Out *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int excluding_last_dim = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int last_dim = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  const int32_t clamp_max = std::numeric_limits<Out>::max();
+  const int32_t clamp_min = std::numeric_limits<Out>::min();
+
+  // Offset is used to interpret the input data "correctly".
+  // If the input is uint8, the data will be unchanged.
+  // If the input is int8, since it will be reinterpret as uint8.
+  // e.g.,
+  // int8 127 will be applied "offset" to become 255 in uint8.
+  uint8_t offset = 0;
+  if (std::is_same<In, int8_t>::value)
+  {
+    offset = 0x80;
+  }
+
+  const uint8_t *input_data_uint = reinterpret_cast<const uint8_t *>(input_data);
+
+  // This code uses ARM64-only instructions.
+  // TODO(b/143709993): Port to ARMv7
+
+  // Load the tables into registers. (4*4 128-bit registers)
+  uint8x16x4_t table1[4];
+  table1[0] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 0);
+  table1[1] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 1);
+  table1[2] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 2);
+  table1[3] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 3);
+
+  uint8x16x4_t table2[4];
+  table2[0] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 0);
+  table2[1] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 1);
+  table2[2] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 2);
+  table2[3] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 3);
+
+  for (int i = 0; i < excluding_last_dim; ++i)
+  {
+    // Find max quantized value.
+    int32_t max_val = FindMaxValue(last_dim, input_data_uint, offset);
+
+    int32_t sum_exp = 0;
+    const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+    const uint8_t table_offset = max_uint8 - max_val;
+
+    // Calculate normalizer sum(exp(x)).
+    int sum_j = 0;
+    uint8x16_t table_offset_dup = vdupq_n_u8(table_offset);
+    uint8x16_t offset_dup = vdupq_n_u8(offset);
+    uint32x4_t sum_4 = vdupq_n_u32(0);
+    const int multiplier_shift = 8;
+    for (; sum_j <= last_dim - 16; sum_j += 16)
      {
-      int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min)
-      {
-        const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
-          input_diff, input_beta_multiplier, input_beta_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-          FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-
-        FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
-        int32_t unsat_output = gemmlowp::RoundingDivideByPOT((shifted_scale * exp_in_0).raw(),
-                                                             num_bits_over_unit + 31 - 8);
-
-        output_data[i * depth + c] = static_cast<uint8_t>(
-          std::max(std::min(unsat_output, static_cast<int32_t>(255)), static_cast<int32_t>(0)));
-      }
-      else
-      {
-        output_data[i * depth + c] = 0;
-      }
+      uint8x16_t input_value = vld1q_u8(input_data_uint + sum_j);
+      input_value = veorq_u8(input_value, offset_dup);
+      input_value = vaddq_u8(input_value, table_offset_dup);
+
+      const uint8x16_t output1 = aarch64_lookup_vector(table1, input_value);
+      const uint8x16_t output2 = aarch64_lookup_vector(table2, input_value);
+
+      uint16x8_t exp_value1 = vshll_n_u8(vget_high_u8(output1), multiplier_shift);
+      uint16x8_t exp_value2 = vshll_n_u8(vget_low_u8(output1), multiplier_shift);
+
+      exp_value1 = vaddw_u8(exp_value1, vget_high_u8(output2));
+      exp_value2 = vaddw_u8(exp_value2, vget_low_u8(output2));
+
+      sum_4 = vpadalq_u16(sum_4, exp_value1);
+      sum_4 = vpadalq_u16(sum_4, exp_value2);
+    }
+    int temp = vgetq_lane_u32(sum_4, 0) + vgetq_lane_u32(sum_4, 1) + vgetq_lane_u32(sum_4, 2) +
+               vgetq_lane_u32(sum_4, 3);
+    sum_exp += temp;
+
+    for (; sum_j < last_dim; ++sum_j)
+    {
+      const uint8_t index = (input_data_uint[sum_j] ^ offset) + table_offset;
+
+      uint8_t part1 = params.uint8_table1[index];
+      uint8_t part2 = params.uint8_table2[index];
+      sum_exp += ((part1 << 8) + part2);
+    }
+
+    const float inv_sum_exp = 1.0f / (sum_exp * params.scale);
+
+    int32_t multiplier, shift;
+    QuantizeMultiplier(inv_sum_exp, &multiplier, &shift);
+
+    // Normalize and quantize probabilities.
+    int j = 0;
+    const int32x4_t output_zp_dup = vdupq_n_s32(params.zero_point);
+    const int32x4_t max_val_dup = vdupq_n_s32(clamp_max);
+    const int32x4_t min_val_dup = vdupq_n_s32(clamp_min);
+
+    for (; j <= last_dim - 16; j += 16)
+    {
+      uint8x16_t input_value = vld1q_u8(input_data_uint + j);
+      input_value = veorq_u8(input_value, offset_dup);
+      input_value = vaddq_u8(input_value, table_offset_dup);
+
+      const uint8x16_t output1 = aarch64_lookup_vector(table1, input_value);
+      const uint8x16_t output2 = aarch64_lookup_vector(table2, input_value);
+
+      uint16x8_t exp_value1 = vshll_n_u8(vget_high_u8(output1), multiplier_shift);
+      uint16x8_t exp_value2 = vshll_n_u8(vget_low_u8(output1), multiplier_shift);
+
+      exp_value1 = vaddw_u8(exp_value1, vget_high_u8(output2));
+      exp_value2 = vaddw_u8(exp_value2, vget_low_u8(output2));
+
+      int32x4x4_t output_value;
+      output_value.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(exp_value1)));
+      output_value.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(exp_value1)));
+      output_value.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(exp_value2)));
+      output_value.val[3] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(exp_value2)));
+
+      int32x4x4_t temp_val = MultiplyByQuantizedMultiplier4Rows(output_value, multiplier, shift);
+
+      temp_val.val[0] = vaddq_s32(temp_val.val[0], output_zp_dup);
+      temp_val.val[1] = vaddq_s32(temp_val.val[1], output_zp_dup);
+      temp_val.val[2] = vaddq_s32(temp_val.val[2], output_zp_dup);
+      temp_val.val[3] = vaddq_s32(temp_val.val[3], output_zp_dup);
+
+      temp_val.val[0] = vmaxq_s32(vminq_s32(temp_val.val[0], max_val_dup), min_val_dup);
+      temp_val.val[1] = vmaxq_s32(vminq_s32(temp_val.val[1], max_val_dup), min_val_dup);
+      temp_val.val[2] = vmaxq_s32(vminq_s32(temp_val.val[2], max_val_dup), min_val_dup);
+      temp_val.val[3] = vmaxq_s32(vminq_s32(temp_val.val[3], max_val_dup), min_val_dup);
+
+      StoreValue(temp_val, output_data + j);
+    }
+    for (; j < last_dim; ++j)
+    {
+      const uint8_t index = (input_data_uint[j] ^ offset) + table_offset;
+      const uint8_t part1 = params.uint8_table1[index];
+      const uint8_t part2 = params.uint8_table2[index];
+      const int32_t exp_value = (part1 << 8) + part2;
+      const int32_t output_value = MultiplyByQuantizedMultiplier(exp_value, multiplier, shift);
+
+      output_data[j] = static_cast<Out>(
+        std::max(std::min(clamp_max, output_value + params.zero_point), clamp_min));
      }
+    input_data_uint += last_dim;
+    output_data += last_dim;
    }
  }
+#endif
  
  } // namespace cker
  } // namespace nnfw
diff --git a/compute/cker/include/cker/operation/StatelessRandomUniform.h b/compute/cker/include/cker/operation/StatelessRandomUniform.h

index cdd812a086c347aff4797ec045be35d247630720..dcf649ca1920346c2c93c00c4b90431aee93819d 100644 (file)
--- a/compute/cker/include/cker/operation/StatelessRandomUniform.h
+++ b/compute/cker/include/cker/operation/StatelessRandomUniform.h
@@ -72,8 +72,8 @@ void Fill(random::PhiloxRandom random, Tensor *output)
                                                      Distribution());
  }
  
-inline void StatelessRandomUniform(const Shape &shape_shape, const int *shape_data,
-                                   const Shape &seed_shape, const int *seed_data,
+inline void StatelessRandomUniform(const Shape &shape_shape, const int32_t *shape_data,
+                                   const Shape &seed_shape, const int32_t *seed_data,
                                     const Shape &output_shape, float *output_data)
  {
    Tensor shape_t;
diff --git a/compute/cker/include/cker/operation/Transpose.h b/compute/cker/include/cker/operation/Transpose.h

index 62eb432aeb9023cc87450277871061236b24c934..52c826c396dcce261f8dc30b8dccdbbb90438de0 100644 (file)
--- a/compute/cker/include/cker/operation/Transpose.h
+++ b/compute/cker/include/cker/operation/Transpose.h
@@ -288,7 +288,7 @@ size_t Flatten(const Shape &input_shape, const Shape &output_shape, const Transp
    return flat_size;
  }
  
-} // namespace anonymous (util)
+} // namespace
  
  // Transpose2D only deals with typical 2D matrix transpose ops.
  // Perform transpose by transposing 4x4 blocks of the input, proceeding from
diff --git a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h

index 8c1d31b560aa2817894cbcd4036ddd0fc6615cb1..1fe3e1517cccaa07d2d692df2a09592ea6d5b10b 100644 (file)
--- a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
@@ -35,6 +35,7 @@ namespace cker
  namespace optimized
  {
  
+/* Old version: For Sub(float) and Div. */
  template <typename ElementwiseF, typename ScalarBroadcastF, typename T>
  inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &params, bool switch_inputs,
                                      const Shape & /* unswitched_input1_shape */,
@@ -122,8 +123,108 @@ inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &params, bool
    }
  }
  
-inline int32_t quant8_sum(const BinaryArithmeticOpParam &params, const uint8_t input1_data,
-                          const uint8_t input2_data)
+// New version: For Mul, Add and Sub(quant8)
+template <typename ElementwiseF, typename ScalarBroadcastF, typename T>
+inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &unswitched_params,
+                                    const Shape & /* unswitched_input1_shape */,
+                                    const T *unswitched_input1_data,
+                                    const Shape & /* unswitched_input2_shape */,
+                                    const T *unswitched_input2_data,
+                                    const Shape & /* output_shape */, T *output_data,
+                                    ElementwiseF elementwise_f, ScalarBroadcastF scalar_broadcast_f)
+{
+  BinaryArithmeticOpParam switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
+  switched_params.input1_shift = unswitched_params.input2_shift;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
+  switched_params.input2_shift = unswitched_params.input1_shift;
+
+  const bool use_unswitched =
+    unswitched_params.broadcast_category == BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const BinaryArithmeticOpParam &params = use_unswitched ? unswitched_params : switched_params;
+  const T *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const T *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise add of
+  // sections of the arrays.
+  T *output_data_ptr = output_data;
+  const T *input1_data_ptr = input1_data;
+  const T *input2_data_reset = input2_data;
+  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
+  // between input shapes. y3 for input 1 is always broadcast, and so the
+  // dimension there is 1, whereas optionally y1 might be broadcast for
+  // input 2. Put another way, input1.shape.FlatSize = y0 * y1 * y2 * y4,
+  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  if (y4 > 1)
+  {
+    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
+    // dimension.
+    for (int i0 = 0; i0 < y0; ++i0)
+    {
+      const T *input2_data_ptr = nullptr;
+      for (int i1 = 0; i1 < y1; ++i1)
+      {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2)
+        {
+          for (int i3 = 0; i3 < y3; ++i3)
+          {
+            elementwise_f(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+            input2_data_ptr += y4;
+            output_data_ptr += y4;
+          }
+          // We have broadcast y4 of input1 data y3 times, and now move on.
+          input1_data_ptr += y4;
+        }
+      }
+      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
+      input2_data_reset = input2_data_ptr;
+    }
+  }
+  else
+  {
+    // Special case of y4 == 1, in which the innermost loop is a single
+    // element and can be combined with the next (y3) as an inner broadcast.
+    //
+    // Note that this handles the case of pure scalar broadcast when
+    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
+    // broadcast with batch (as y2 > 1).
+    //
+    // NOTE The process is the same as the above general case except
+    // simplified for y4 == 1 and the loop over y3 is contained within the
+    // AddScalarBroadcast function.
+    for (int i0 = 0; i0 < y0; ++i0)
+    {
+      const T *input2_data_ptr = nullptr;
+      for (int i1 = 0; i1 < y1; ++i1)
+      {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2)
+        {
+          scalar_broadcast_f(y3, params, *input1_data_ptr, input2_data_ptr, output_data_ptr);
+          input2_data_ptr += y3;
+          output_data_ptr += y3;
+          input1_data_ptr += 1;
+        }
+      }
+      input2_data_reset = input2_data_ptr;
+    }
+  }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value, int32_t>
+quant8_sum(const BinaryArithmeticOpParam &params, const T input1_data, const T input2_data)
  {
    const int32_t input1_val = params.input1_offset + input1_data;
    const int32_t input2_val = params.input2_offset + input2_data;
@@ -142,9 +243,9 @@ inline int32_t quant8_sum(const BinaryArithmeticOpParam &params, const uint8_t i
    return clamped_output;
  }
  
-inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam &params,
-                                 const uint8_t *input1_data, const uint8_t *input2_data,
-                                 uint8_t *output_data)
+inline void AddElementwise(int size, const BinaryArithmeticOpParam &params,
+                           const uint8_t *input1_data, const uint8_t *input2_data,
+                           uint8_t *output_data)
  {
    int i = 0;
  
@@ -218,6 +319,119 @@ inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
    }
  }
  
+inline void AddElementwise(int size, const BinaryArithmeticOpParam &params,
+                           const int8_t *input1_data, const int8_t *input2_data,
+                           int8_t *output_data)
+{
+  int i = 0;
+#ifdef USE_NEON
+  const int8x16_t output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+  const int8x16_t output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+
+  const int input1_left_shift = params.left_shift + params.input1_shift;
+  const int input2_left_shift = params.left_shift + params.input2_shift;
+  const int32x4_t input1_left_dup = vdupq_n_s32(input1_left_shift);
+  const int32x4_t input2_left_dup = vdupq_n_s32(input2_left_shift);
+
+  const int16x8_t input1_offset_dup = vdupq_n_s16(params.input1_offset);
+  const int16x8_t input2_offset_dup = vdupq_n_s16(params.input2_offset);
+
+  for (; i <= size - 16; i += 16)
+  {
+    const int8x16_t input1_val_original = vld1q_s8(input1_data + i);
+    const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+
+    const int16x8_t input1_val_s16_high = vmovl_s8(vget_high_s8(input1_val_original));
+    const int16x8_t input1_val_s16_low = vmovl_s8(vget_low_s8(input1_val_original));
+
+    const int16x8_t input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+    const int16x8_t input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+    const int16x8_t input1_val_high = vaddq_s16(input1_val_s16_high, input1_offset_dup);
+    const int16x8_t input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_dup);
+    const int16x8_t input1_val_low = vaddq_s16(input1_val_s16_low, input1_offset_dup);
+    const int16x8_t input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_dup);
+    const int16x4_t input1_val_high_high = vget_high_s16(input1_val_high);
+    const int16x4_t input1_val_high_low = vget_low_s16(input1_val_high);
+    const int16x4_t input1_val_low_high = vget_high_s16(input1_val_low);
+    const int16x4_t input1_val_low_low = vget_low_s16(input1_val_low);
+    const int16x4_t input2_val_high_high = vget_high_s16(input2_val_high);
+    const int16x4_t input2_val_high_low = vget_low_s16(input2_val_high);
+    const int16x4_t input2_val_low_high = vget_high_s16(input2_val_low);
+    const int16x4_t input2_val_low_low = vget_low_s16(input2_val_low);
+    int32x4_t x111 = vmovl_s16(input1_val_low_low);
+    int32x4_t x112 = vmovl_s16(input1_val_low_high);
+    int32x4_t x121 = vmovl_s16(input1_val_high_low);
+    int32x4_t x122 = vmovl_s16(input1_val_high_high);
+    int32x4_t x211 = vmovl_s16(input2_val_low_low);
+    int32x4_t x212 = vmovl_s16(input2_val_low_high);
+    int32x4_t x221 = vmovl_s16(input2_val_high_low);
+    int32x4_t x222 = vmovl_s16(input2_val_high_high);
+
+    x111 = vshlq_s32(x111, input1_left_dup);
+    x112 = vshlq_s32(x112, input1_left_dup);
+    x121 = vshlq_s32(x121, input1_left_dup);
+    x122 = vshlq_s32(x122, input1_left_dup);
+    x211 = vshlq_s32(x211, input2_left_dup);
+    x212 = vshlq_s32(x212, input2_left_dup);
+    x221 = vshlq_s32(x221, input2_left_dup);
+    x222 = vshlq_s32(x222, input2_left_dup);
+    x111 = vqrdmulhq_n_s32(x111, params.input1_multiplier);
+    x112 = vqrdmulhq_n_s32(x112, params.input1_multiplier);
+    x121 = vqrdmulhq_n_s32(x121, params.input1_multiplier);
+    x122 = vqrdmulhq_n_s32(x122, params.input1_multiplier);
+    x211 = vqrdmulhq_n_s32(x211, params.input2_multiplier);
+    x212 = vqrdmulhq_n_s32(x212, params.input2_multiplier);
+    x221 = vqrdmulhq_n_s32(x221, params.input2_multiplier);
+    x222 = vqrdmulhq_n_s32(x222, params.input2_multiplier);
+    int32x4_t s11 = vaddq_s32(x111, x211);
+    int32x4_t s12 = vaddq_s32(x112, x212);
+    int32x4_t s21 = vaddq_s32(x121, x221);
+    int32x4_t s22 = vaddq_s32(x122, x222);
+    s11 = vqrdmulhq_n_s32(s11, params.output_multiplier);
+    s12 = vqrdmulhq_n_s32(s12, params.output_multiplier);
+    s21 = vqrdmulhq_n_s32(s21, params.output_multiplier);
+    s22 = vqrdmulhq_n_s32(s22, params.output_multiplier);
+    using gemmlowp::RoundingDivideByPOT;
+    s11 = RoundingDivideByPOT(s11, -params.output_shift);
+    s12 = RoundingDivideByPOT(s12, -params.output_shift);
+    s21 = RoundingDivideByPOT(s21, -params.output_shift);
+    s22 = RoundingDivideByPOT(s22, -params.output_shift);
+    const int16x4_t s11_narrowed = vmovn_s32(s11);
+    const int16x4_t s12_narrowed = vmovn_s32(s12);
+    const int16x4_t s21_narrowed = vmovn_s32(s21);
+    const int16x4_t s22_narrowed = vmovn_s32(s22);
+    const int16x8_t s1 =
+      vaddq_s16(vcombine_s16(s11_narrowed, s12_narrowed), vdupq_n_s16(params.output_offset));
+    const int16x8_t s2 =
+      vaddq_s16(vcombine_s16(s21_narrowed, s22_narrowed), vdupq_n_s16(params.output_offset));
+    const int8x16_t s = vcombine_s8(vqmovn_s16(s1), vqmovn_s16(s2));
+
+    const int8x16_t clamped =
+      vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, s));
+    vst1q_s8(output_data + i, clamped);
+  }
+#endif // NEON
+
+  for (; i < size; ++i)
+  {
+    const int32_t input1_val = params.input1_offset + input1_data[i];
+    const int32_t input2_val = params.input2_offset + input2_data[i];
+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+    const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, params.input1_multiplier, params.input1_shift);
+    const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input2_val, params.input2_multiplier, params.input2_shift);
+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                                 raw_sum, params.output_multiplier, params.output_shift) +
+                               params.output_offset;
+    const int32_t clamped_output = std::min(params.quantized_activation_max,
+                                            std::max(params.quantized_activation_min, raw_output));
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
  struct BinaryOpFuncAddFloat
  {
  #ifdef USE_NEON
@@ -473,12 +687,13 @@ getBinaryOpWithActivationImplFloat(const BinaryArithmeticOpParam &params)
                                    BinaryOpScalarBroadcast<FUNC, BinaryOpActivationFloatMinMax>);
  }
  
-inline void AddQuant8(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                      const uint8_t *input1_data, const Shape &input2_shape,
-                      const uint8_t *input2_data, const Shape &output_shape, uint8_t *output_data)
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+Add(const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
+    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data)
  {
    const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  AddElementwiseQuant8(flat_size, params, input1_data, input2_data, output_data);
+  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
  }
  
  inline void Add(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
@@ -493,9 +708,9 @@ inline void Add(const BinaryArithmeticOpParam &params, const Shape &input1_shape
  // Scalar-broadcast add that can be used for inner loop of more general
  // broadcast add, so that, for example, scalar-broadcast with batch will still
  // be fast.
-inline void AddScalarBroadcastQuant8(int size, const BinaryArithmeticOpParam &params,
-                                     uint8_t broadcast_value, const uint8_t *input2_data,
-                                     uint8_t *output_data)
+inline void AddScalarBroadcast(int size, const BinaryArithmeticOpParam &params,
+                               uint8_t broadcast_value, const uint8_t *input2_data,
+                               uint8_t *output_data)
  {
    int i = 0;
    int32_t clamped_output;
@@ -506,31 +721,115 @@ inline void AddScalarBroadcastQuant8(int size, const BinaryArithmeticOpParam &pa
    }
  }
  
-inline void BroadcastAddDispatchQuant8(const BinaryArithmeticOpParam &params,
-                                       const Shape &input1_shape, const uint8_t *input1_data,
-                                       const Shape &input2_shape, const uint8_t *input2_data,
-                                       const Shape &output_shape, uint8_t *output_data)
+// Scalar-broadcast add that can be used for inner loop of more general
+// broadcast add, so that, for example, scalar-broadcast with batch will still
+// be fast.
+inline void AddScalarBroadcast(int size, const BinaryArithmeticOpParam &params, int8_t input1_data,
+                               const int8_t *input2_data, int8_t *output_data)
  {
-  if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
+  using gemmlowp::RoundingDivideByPOT;
+  int i = 0;
+#ifdef USE_NEON
+  const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
+  const int8x8_t output_activation_min_vector = vdup_n_s8(params.quantized_activation_min);
+  const int8x8_t output_activation_max_vector = vdup_n_s8(params.quantized_activation_max);
+
+  // Process broadcast scalar.
+  const int8x8_t input1_val_original = vdup_n_s8(input1_data);
+  const int16x8_t input1_val_s16 = vmovl_s8(input1_val_original);
+  const int16x8_t input1_val = vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
+  const int16x4_t input1_val_high = vget_high_s16(input1_val);
+  const int16x4_t input1_val_low = vget_low_s16(input1_val);
+  int32x4_t x11 = vmovl_s16(input1_val_low);
+  int32x4_t x12 = vmovl_s16(input1_val_high);
+  x11 = vshlq_s32(x11, left_shift_dup);
+  x12 = vshlq_s32(x12, left_shift_dup);
+  x11 = vqrdmulhq_n_s32(x11, params.input1_multiplier);
+  x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
+  const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
+  x11 = vshlq_s32(x11, input1_shift_dup);
+  x12 = vshlq_s32(x12, input1_shift_dup);
+
+  for (; i <= size - 8; i += 8)
    {
-    const std::function<uint8_t(const BinaryArithmeticOpParam &, const uint8_t &, const uint8_t &)>
-      fn =
-        [](const BinaryArithmeticOpParam &params, const uint8_t &a, const uint8_t &b) -> uint8_t {
-      return static_cast<uint8_t>(quant8_sum(params, a, b));
-    };
-    reference::BroadcastBinaryArithmeticOpSlowQuant8(
-      params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn);
+    const int8x8_t input2_val_original = vld1_s8(input2_data + i);
+    const int16x8_t input2_val_s16 = vmovl_s8(input2_val_original);
+    const int16x8_t input2_val = vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
+    const int16x4_t input2_val_high = vget_high_s16(input2_val);
+    const int16x4_t input2_val_low = vget_low_s16(input2_val);
+    int32x4_t x21 = vmovl_s16(input2_val_low);
+    int32x4_t x22 = vmovl_s16(input2_val_high);
+    x21 = vshlq_s32(x21, left_shift_dup);
+    x22 = vshlq_s32(x22, left_shift_dup);
+    x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
+    x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
+    const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
+    x21 = vshlq_s32(x21, input2_shift_dup);
+    x22 = vshlq_s32(x22, input2_shift_dup);
+    int32x4_t s1 = vaddq_s32(x11, x21);
+    int32x4_t s2 = vaddq_s32(x12, x22);
+    s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
+    s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
+    s1 = RoundingDivideByPOT(s1, -params.output_shift);
+    s2 = RoundingDivideByPOT(s2, -params.output_shift);
+    const int16x4_t s1_narrowed = vmovn_s32(s1);
+    const int16x4_t s2_narrowed = vmovn_s32(s2);
+    const int16x8_t s =
+      vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
+    const int8x8_t clamped =
+      vmax_s8(output_activation_min_vector, vmin_s8(output_activation_max_vector, vqmovn_s16(s)));
+    vst1_s8(output_data + i, clamped);
    }
-  else
+#endif // NEON
+
+  if (i < size)
    {
-    BinaryBroadcastFiveFold(
-      params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
-      input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
-      static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
-                           uint8_t *)>(AddElementwiseQuant8),
-      static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
-                           uint8_t *)>(AddScalarBroadcastQuant8));
+    // Process broadcast scalar.
+    const int32_t input1_val = params.input1_offset + input1_data;
+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+    const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, params.input1_multiplier, params.input1_shift);
+
+    for (; i < size; ++i)
+    {
+      const int32_t input2_val = params.input2_offset + input2_data[i];
+      const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+      const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+        shifted_input2_val, params.input2_multiplier, params.input2_shift);
+      const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+      const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                                   raw_sum, params.output_multiplier, params.output_shift) +
+                                 params.output_offset;
+      const int32_t clamped_output = std::min(
+        params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output));
+      output_data[i] = static_cast<int8_t>(clamped_output);
+    }
+  }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastAddDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                     const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                     const Shape &output_shape, T *output_data)
+{
+  if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
+  {
+    const std::function<T(const BinaryArithmeticOpParam &, const T &, const T &)> fn =
+      [](const BinaryArithmeticOpParam &params, const T &a, const T &b) {
+        return static_cast<T>(quant8_sum(params, a, b));
+      };
+    reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
+                                               input2_data, output_shape, output_data, fn);
+    return;
    }
+
+  BinaryBroadcastFiveFold(
+    params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+    static_cast<void (*)(int, const BinaryArithmeticOpParam &, const T *, const T *, T *)>(
+      AddElementwise),
+    static_cast<void (*)(int, const BinaryArithmeticOpParam &, T, const T *, T *)>(
+      AddScalarBroadcast));
  }
  
  inline void BroadcastAddDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
@@ -592,8 +891,9 @@ inline void BroadcastSubDispatch(const BinaryArithmeticOpParam &params, const Sh
    }
  }
  
-inline int32_t quant8_mul(const BinaryArithmeticOpParam &params, const uint8_t input1_data,
-                          const uint8_t input2_data)
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value, int32_t>
+quant8_mul(const BinaryArithmeticOpParam &params, const T input1_data, const T input2_data)
  {
    const int32_t input1_val = params.input1_offset + input1_data;
    const int32_t input2_val = params.input2_offset + input2_data;
@@ -607,9 +907,9 @@ inline int32_t quant8_mul(const BinaryArithmeticOpParam &params, const uint8_t i
    return clamped_output;
  }
  
-inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam &params,
-                                 const uint8_t *input1_data, const uint8_t *input2_data,
-                                 uint8_t *output_data)
+inline void MulElementwise(int size, const BinaryArithmeticOpParam &params,
+                           const uint8_t *input1_data, const uint8_t *input2_data,
+                           uint8_t *output_data)
  {
    int i = 0;
  
@@ -671,12 +971,102 @@ inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
    }
  }
  
-inline void MulQuant8(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                      const uint8_t *input1_data, const Shape &input2_shape,
-                      const uint8_t *input2_data, const Shape &output_shape, uint8_t *output_data)
+inline void MulElementwise(int size, const BinaryArithmeticOpParam &params,
+                           const int8_t *input1_data, const int8_t *input2_data,
+                           int8_t *output_data)
+{
+  int i = 0;
+#ifdef USE_NEON
+  const int16x8_t input1_offset_vector = vdupq_n_s16(params.input1_offset);
+  const int16x8_t input2_offset_vector = vdupq_n_s16(params.input2_offset);
+  const int16x8_t output_offset_vector = vdupq_n_s16(params.output_offset);
+  const auto output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+  const auto output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+  const int left_shift = std::max(0, params.output_shift);
+  const int right_shift = std::max(0, -params.output_shift);
+  const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
+  for (; i <= size - 16; i += 16)
+  {
+    // We load / store 16 at a time, multiplying as four sets of 4 int32s.
+    const int8x16_t input1_val_original = vld1q_s8(input1_data + i);
+    const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+
+    const int16x8_t input1_val_s16_high = vmovl_s8(vget_high_s8(input1_val_original));
+    const int16x8_t input1_val_s16_low = vmovl_s8(vget_low_s8(input1_val_original));
+
+    const int16x8_t input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+    const int16x8_t input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+    const int16x8_t input1_val_high = vaddq_s16(input1_val_s16_high, input1_offset_vector);
+    const int16x8_t input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_vector);
+    const int16x8_t input1_val_low = vaddq_s16(input1_val_s16_low, input1_offset_vector);
+    const int16x8_t input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_vector);
+    const int16x4_t input1_val_high_high = vget_high_s16(input1_val_high);
+    const int16x4_t input1_val_high_low = vget_low_s16(input1_val_high);
+    const int16x4_t input1_val_low_high = vget_high_s16(input1_val_low);
+    const int16x4_t input1_val_low_low = vget_low_s16(input1_val_low);
+    const int16x4_t input2_val_high_high = vget_high_s16(input2_val_high);
+    const int16x4_t input2_val_high_low = vget_low_s16(input2_val_high);
+    const int16x4_t input2_val_low_high = vget_high_s16(input2_val_low);
+    const int16x4_t input2_val_low_low = vget_low_s16(input2_val_low);
+
+    auto p1 = vmull_s16(input2_val_high_high, input1_val_high_high);
+    auto p2 = vmull_s16(input2_val_high_low, input1_val_high_low);
+    auto p3 = vmull_s16(input2_val_low_high, input1_val_low_high);
+    auto p4 = vmull_s16(input2_val_low_low, input1_val_low_low);
+
+    p1 = vshlq_s32(p1, left_shift_vec);
+    p2 = vshlq_s32(p2, left_shift_vec);
+    p3 = vshlq_s32(p3, left_shift_vec);
+    p4 = vshlq_s32(p4, left_shift_vec);
+
+    p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+    p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+    p3 = vqrdmulhq_n_s32(p3, params.output_multiplier);
+    p4 = vqrdmulhq_n_s32(p4, params.output_multiplier);
+    using gemmlowp::RoundingDivideByPOT;
+    p1 = RoundingDivideByPOT(p1, right_shift);
+    p2 = RoundingDivideByPOT(p2, right_shift);
+    p3 = RoundingDivideByPOT(p3, right_shift);
+    p4 = RoundingDivideByPOT(p4, right_shift);
+
+    const auto p1_narrowed = vqmovn_s32(p1);
+    const auto p2_narrowed = vqmovn_s32(p2);
+    const auto p3_narrowed = vqmovn_s32(p3);
+    const auto p4_narrowed = vqmovn_s32(p4);
+
+    const int16x8_t p_part1 =
+      vaddq_s16(vcombine_s16(p2_narrowed, p1_narrowed), output_offset_vector);
+    const int16x8_t p_part2 =
+      vaddq_s16(vcombine_s16(p4_narrowed, p3_narrowed), output_offset_vector);
+    const int8x16_t p = vcombine_s8(vqmovn_s16(p_part2), vqmovn_s16(p_part1));
+
+    const auto clamped =
+      vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, p));
+    vst1q_s8(output_data + i, clamped);
+  }
+#endif // NEON
+
+  for (; i < size; ++i)
+  {
+    const int32_t input1_val = params.input1_offset + input1_data[i];
+    const int32_t input2_val = params.input2_offset + input2_data[i];
+    const int32_t unclamped_result =
+      params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                           params.output_multiplier,
+                                                           params.output_shift);
+    const int32_t clamped_output = std::min(
+      params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+Mul(const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
+    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data)
  {
    const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  MulElementwiseQuant8(flat_size, params, input1_data, input2_data, output_data);
+  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
  }
  
  inline void Mul(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
@@ -688,9 +1078,9 @@ inline void Mul(const BinaryArithmeticOpParam &params, const Shape &input1_shape
    (*implFuncs.first)(flat_size, params, input1_data, input2_data, output_data);
  }
  
-inline void MulSimpleBroadcastQuant8(int size, const BinaryArithmeticOpParam &params,
-                                     const uint8_t broadcast_value, const uint8_t *input2_data,
-                                     uint8_t *output_data)
+inline void MulSimpleBroadcast(int size, const BinaryArithmeticOpParam &params,
+                               const uint8_t broadcast_value, const uint8_t *input2_data,
+                               uint8_t *output_data)
  {
    int i = 0;
    int32_t clamped_output;
@@ -701,29 +1091,109 @@ inline void MulSimpleBroadcastQuant8(int size, const BinaryArithmeticOpParam &pa
    }
  }
  
-inline void BroadcastMulDispatchQuant8(const BinaryArithmeticOpParam &params,
-                                       const Shape &input1_shape, const uint8_t *input1_data,
-                                       const Shape &input2_shape, const uint8_t *input2_data,
-                                       const Shape &output_shape, uint8_t *output_data)
+// Broadcast mul that can often be used for inner loop of broadcast Mul.
+inline void MulSimpleBroadcast(int size, const BinaryArithmeticOpParam &params,
+                               const int8_t broadcast_value, const int8_t *input2_data,
+                               int8_t *output_data)
+{
+  const int16_t input1_val = params.input1_offset + broadcast_value;
+
+  int i = 0;
+#ifdef USE_NEON
+  const auto input2_offset_vector = vdupq_n_s16(params.input2_offset);
+  const auto output_offset_vector = vdupq_n_s16(params.output_offset);
+  const auto output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+  const auto output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+  const int left_shift = std::max(0, params.output_shift);
+  const int right_shift = std::max(0, -params.output_shift);
+  const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
+  for (; i <= size - 16; i += 16)
+  {
+    // We load / store 16 at a time, multiplying as four sets of 4 int32s.
+    const auto input2_val_original = vld1q_s8(input2_data + i);
+    const auto input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+    const auto input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+
+    const auto input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_vector);
+    const auto input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_vector);
+
+    const auto input2_val_low_low = vget_low_s16(input2_val_low);
+    const auto input2_val_low_high = vget_high_s16(input2_val_low);
+    const auto input2_val_high_low = vget_low_s16(input2_val_high);
+    const auto input2_val_high_high = vget_high_s16(input2_val_high);
+
+    auto p1 = vmull_n_s16(input2_val_high_high, input1_val);
+    auto p2 = vmull_n_s16(input2_val_high_low, input1_val);
+    auto p3 = vmull_n_s16(input2_val_low_high, input1_val);
+    auto p4 = vmull_n_s16(input2_val_low_low, input1_val);
+
+    p1 = vshlq_s32(p1, left_shift_vec);
+    p2 = vshlq_s32(p2, left_shift_vec);
+    p3 = vshlq_s32(p3, left_shift_vec);
+    p4 = vshlq_s32(p4, left_shift_vec);
+
+    p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+    p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+    p3 = vqrdmulhq_n_s32(p3, params.output_multiplier);
+    p4 = vqrdmulhq_n_s32(p4, params.output_multiplier);
+    using gemmlowp::RoundingDivideByPOT;
+    p1 = RoundingDivideByPOT(p1, right_shift);
+    p2 = RoundingDivideByPOT(p2, right_shift);
+    p3 = RoundingDivideByPOT(p3, right_shift);
+    p4 = RoundingDivideByPOT(p4, right_shift);
+
+    const auto p1_narrowed = vqmovn_s32(p1);
+    const auto p2_narrowed = vqmovn_s32(p2);
+    const auto p3_narrowed = vqmovn_s32(p3);
+    const auto p4_narrowed = vqmovn_s32(p4);
+
+    const int16x8_t p_part1 =
+      vaddq_s16(vcombine_s16(p2_narrowed, p1_narrowed), output_offset_vector);
+    const int16x8_t p_part2 =
+      vaddq_s16(vcombine_s16(p4_narrowed, p3_narrowed), output_offset_vector);
+    const int8x16_t p = vcombine_s8(vqmovn_s16(p_part2), vqmovn_s16(p_part1));
+
+    const auto clamped =
+      vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, p));
+    vst1q_s8(output_data + i, clamped);
+  }
+#endif // NEON
+
+  for (; i < size; ++i)
+  {
+    const int32_t input2_val = params.input2_offset + input2_data[i];
+    const int32_t unclamped_result =
+      params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                           params.output_multiplier,
+                                                           params.output_shift);
+    const int32_t clamped_output = std::min(
+      params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastMulDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                     const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                     const Shape &output_shape, T *output_data)
  {
    if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
    {
-    const std::function<uint8_t(const BinaryArithmeticOpParam &, const uint8_t &, const uint8_t &)>
-      fn =
-        [](const BinaryArithmeticOpParam &params, const uint8_t &a, const uint8_t &b) -> uint8_t {
-      return static_cast<uint8_t>(quant8_mul(params, a, b));
-    };
-    reference::BroadcastBinaryArithmeticOpSlowQuant8(
-      params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn);
+    const std::function<T(const BinaryArithmeticOpParam &, const T &, const T &)> fn =
+      [](const BinaryArithmeticOpParam &params, const T &a, const T &b) {
+        return static_cast<T>(quant8_mul(params, a, b));
+      };
+    reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
+                                               input2_data, output_shape, output_data, fn);
      return;
    }
    BinaryBroadcastFiveFold(
-    params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
-    input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
-    static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
-                         uint8_t *)>(MulElementwiseQuant8),
-    static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
-                         uint8_t *)>(MulSimpleBroadcastQuant8));
+    params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+    static_cast<void (*)(int, const BinaryArithmeticOpParam &, const T *, const T *, T *)>(
+      MulElementwise),
+    static_cast<void (*)(int, const BinaryArithmeticOpParam &, T, const T *, T *)>(
+      MulSimpleBroadcast));
  }
  
  inline void BroadcastMulDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
@@ -741,10 +1211,8 @@ inline void BroadcastMulDispatch(const BinaryArithmeticOpParam &params, const Sh
      return;
    }
    auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncMulFloat>(params);
-  BinaryBroadcastFiveFold(
-    params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
-    input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
-    implFuncs.first, implFuncs.second);
+  BinaryBroadcastFiveFold(params, input1_shape, input1_data, input2_shape, input2_data,
+                          output_shape, output_data, implFuncs.first, implFuncs.second);
  }
  
  inline void Div(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
diff --git a/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h b/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h

index d4397933ab3bab953b0d19c40a9eba857860f005..17b2fc7a28beb2bddd35aee61bc1dda1699e32fc 100644 (file)
--- a/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h
+++ b/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h
@@ -1243,8 +1243,8 @@ inline void DepthwiseConvImpl(const DepthwiseConvParams &params, const Shape &in
    }
  }
  
-} // nnfw
-} // cker
-} // optimized
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
  
  #endif
diff --git a/compute/cker/include/cker/operation/optimized/integer_ops/DepthwiseConvInt8.h b/compute/cker/include/cker/operation/optimized/integer_ops/DepthwiseConvInt8.h

new file mode 100644 (file)

index 0000000..bd84979
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/integer_ops/DepthwiseConvInt8.h
@@ -0,0 +1,2138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
+#define __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
+
+#include "cker/CpuBackendThreadpool.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
+#include "cker/operation/Quantize.h"
+
+#include <fixedpoint/fixedpoint.h>
+#include <public/gemmlowp.h>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized_integer_ops
+{
+
+// Category of depthwise convolution output rounding.
+enum class DepthwiseConvOutputRounding
+{
+  kNone = 0,     // Invalid: specific method must be specified.
+  kAwayFromZero, // Original method: exact halves rounded away from zero.
+  kUpward,       // Halves towards +infinity: adds 0.5 before truncate.
+  // This is where a future kNearestEven would be placed.
+};
+
+// Category of depthwise convolution depth multiplication.
+enum class DepthwiseConvDepthMultiplication
+{
+  kNoMultiplication = 0, // Depth multiplier = 1.
+  kUnitInputDepth,       // Input depth = 1, output depth = depth multiplier.
+};
+
+namespace depthwise_conv
+{
+
+// Implementation of quantized DepthwiseConv
+
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+struct QuantizedDepthwiseConvKernel
+{
+};
+
+#ifdef USE_NEON
+template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8x2_t filter_s8;
+    filter_s8.val[0] = vld1_s8(filter_ptr);
+    filter_s8.val[1] = vld1_s8(filter_ptr + 8);
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vmovl_s8(filter_s8.val[i]);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4x2_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+        acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+      }
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += input_ptr_increment;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[0].val[i] =
+          vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i]));
+        acc[1].val[i] =
+          vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i]));
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+        vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 8, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+    const int16x8_t filter = vmovl_s8(filter_s8);
+
+    int outp = 0;
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input_s8[i] = vld1_s8(input_ptr + 8 * i);
+      }
+      input_ptr += 16;
+      int16x8_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vmovl_s8(input_s8[i]);
+      }
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+      }
+      // Multiply-accumulate.
+      acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0]));
+      acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0]));
+      acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1]));
+      acc[3] = vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1]));
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle 1 output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[2];
+      acc[0] = vld1q_s32(acc_buffer_ptr);
+      acc[1] = vld1q_s32(acc_buffer_ptr + 4);
+
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Multiply-accumulate.
+      acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input));
+      acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input));
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc[0]);
+      vst1q_s32(acc_buffer_ptr + 4, acc[1]);
+      acc_buffer_ptr += 8;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 2>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+    const int16x8_t filter = vmovl_s8(filter_s8);
+
+    int outp = 0;
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[2 * i + 0] =
+          vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i]));
+        acc[2 * i + 1] =
+          vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i]));
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x4x2_t input_dup2 = vzip_s16(input, input);
+      // Multiply-accumulate
+      acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]);
+      acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), input_dup2.val[1]);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 8>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      const int8x8_t filter_s8 = vld1_s8(filter_ptr + 8 * i);
+      filter[i] = vmovl_s8(filter_s8);
+    }
+    int outp = 0;
+    // Handle two output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[8];
+      for (int i = 0; i < 8; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Multiply-accumulate.
+      acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+      acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0);
+      acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
+      acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1);
+      acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), input, 2);
+      acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), input, 2);
+      acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), input, 3);
+      acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), input, 3);
+      // Store the accumulators back to acc_buffer.
+      for (int i = 0; i < 8; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 32;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_ptr += 2;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+      acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0);
+      acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
+      acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1);
+
+      // Store the accumulators back to acc_buffer.
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 2>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+      // Multiply-accumulate
+      acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0]));
+      acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0]));
+      acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1]));
+      acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1]));
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_ptr += 2;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x4_t input_dup2 = vzip_s16(input, input).val[0];
+      // Multiply-accumulate
+      acc = vmlal_s16(acc, filter, input_dup2);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 8 output pixels at a time.
+    for (; outp <= num_output_pixels - 8; outp += 8)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input_s8[i] = vld1_s8(input_ptr + 8 * i);
+      }
+      input_ptr += 16;
+      int16x8_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vmovl_s8(input_s8[i]);
+      }
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+      }
+
+      // Multiply-accumulate.
+      acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input[0]));
+      acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input[0]));
+      acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input[1]));
+      acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input[1]));
+      // Store the accumulators back to acc_buffer.
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input));
+      acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input));
+      // Store the accumulators back to acc_buffer.
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc = vmlal_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer.
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+    // Handle 1 output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x2_t acc = vld1_s32(acc_buffer_ptr);
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_ptr += 2;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input));
+      // Store the accumulators back to acc_buffer.
+      vst1_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 2;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 1, 2>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 8 output pixels at a time.
+    for (; outp <= num_output_pixels - 8; outp += 8)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+      // Multiply-accumulate
+      acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0]));
+      acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0]));
+      acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1]));
+      acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1]));
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x2_t acc = vld1_s32(acc_buffer_ptr);
+
+      // Load the inputs, add input_offset.
+      const uint32_t input = *input_ptr++ + input_offset;
+
+      // Multiply-accumulate
+      acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input));
+      // Store the accumulators back to acc_buffer
+      vst1_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 2;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 1, 4>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 8 output pixels at a time.
+    for (; outp <= num_output_pixels - 8; outp += 8)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[8];
+      for (int i = 0; i < 8; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+      // Multiply-accumulate
+      acc[0] = vmlal_lane_s16(acc[0], filter, vget_low_s16(input), 0);
+      acc[1] = vmlal_lane_s16(acc[1], filter, vget_low_s16(input), 1);
+      acc[2] = vmlal_lane_s16(acc[2], filter, vget_low_s16(input), 2);
+      acc[3] = vmlal_lane_s16(acc[3], filter, vget_low_s16(input), 3);
+      acc[4] = vmlal_lane_s16(acc[4], filter, vget_high_s16(input), 0);
+      acc[5] = vmlal_lane_s16(acc[5], filter, vget_high_s16(input), 1);
+      acc[6] = vmlal_lane_s16(acc[6], filter, vget_high_s16(input), 2);
+      acc[7] = vmlal_lane_s16(acc[7], filter, vget_high_s16(input), 3);
+
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 8; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 32;
+    }
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate
+      acc[0] = vmlal_lane_s16(acc[0], filter, input, 0);
+      acc[1] = vmlal_lane_s16(acc[1], filter, input, 1);
+      acc[2] = vmlal_lane_s16(acc[2], filter, input, 2);
+      acc[3] = vmlal_lane_s16(acc[3], filter, input, 3);
+
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+
+      // Load the inputs, add input_offset.
+      const uint32_t input = *input_ptr++ + input_offset;
+
+      // Multiply-accumulate
+      acc = vmlal_n_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int16x8_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        const int8x8_t input_s8 = vld1_s8(input_ptr + 8 * i);
+        const int16x8_t input_s16 = vmovl_s8(input_s8);
+        input[i] = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      }
+      input_ptr += 16;
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], filter, vget_low_s16(input[i]));
+        acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], filter, vget_high_s16(input[i]));
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc;
+      acc = vld1q_s32(acc_buffer_ptr);
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Multiply-accumulate
+      acc = vmlal_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 4>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      const int8x8_t filter_s8 = vld1_s8(filter_ptr + 8 * i);
+      filter[i] = vmovl_s8(filter_s8);
+    }
+
+    int outp = 0;
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[8];
+      for (int i = 0; i < 8; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+      // Multiply-accumulate
+      acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), vget_low_s16(input), 0);
+      acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), vget_low_s16(input), 1);
+      acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), vget_low_s16(input), 2);
+      acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), vget_low_s16(input), 3);
+      acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), vget_high_s16(input), 0);
+      acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), vget_high_s16(input), 1);
+      acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), vget_high_s16(input), 2);
+      acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), vget_high_s16(input), 3);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 8; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 32;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate
+      acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+      acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 1);
+      acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 2);
+      acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 3);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
+{
+  static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // We will have to duplicate bytes in a NEON register, 3-fold.
+    // We will do that by register-level table-look-up using VTBL instructions.
+    // Here we prepare the registers containing the table-lookup indices.
+    static const int8_t dup3_indices_array[3][8] = {
+      {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}};
+    int8x8_t dup3_indices[3];
+    for (int i = 0; i < 3; i++)
+    {
+      dup3_indices[i] = vld1_s8(dup3_indices_array[i]);
+    }
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const int8_t *local_filter_ptr = filter_ptr;
+      const int8_t *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 8 input channels at a time.
+      for (; ic <= input_depth - 8; ic += 8)
+      {
+        // Load the filters.
+        int16x8_t filter[3];
+        int8x8x3_t filter_s8;
+        filter_s8.val[0] = vld1_s8(local_filter_ptr);
+        filter_s8.val[1] = vld1_s8(local_filter_ptr + 8);
+        filter_s8.val[2] = vld1_s8(local_filter_ptr + 16);
+        local_filter_ptr += 24;
+        for (int i = 0; i < 3; i++)
+        {
+          filter[i] = vmovl_s8(filter_s8.val[i]);
+        }
+        // Load the inputs, duplicate 3-fold, add input_offset.
+        const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+        local_input_ptr += 8;
+
+        int8x8_t input_s8_dup3[3];
+        for (int i = 0; i < 3; i++)
+        {
+          input_s8_dup3[i] = vtbl1_s8(input_s8, dup3_indices[i]);
+        }
+        int16x8_t input_dup3[3];
+        for (int i = 0; i < 3; i++)
+        {
+          const int16x8_t input_s16_dup3 = vmovl_s8(input_s8_dup3[i]);
+          input_dup3[i] = vaddq_s16(input_s16_dup3, vdupq_n_s16(input_offset));
+        }
+        // Load the accumulators from acc_buffer
+        int32x4x3_t acc[2];
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+          acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+          acc[i].val[2] = vld1q_s32(acc_buffer_ptr + 4 * i + 16);
+        }
+        // Multiply-accumulate
+        for (int j = 0; j < 3; j++)
+        {
+          acc[0].val[j] =
+            vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j]));
+          acc[1].val[j] =
+            vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j]));
+        }
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 2; i++)
+        {
+          vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+          vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+          vst1q_s32(acc_buffer_ptr + 4 * i + 16, acc[i].val[2]);
+        }
+        acc_buffer_ptr += 24;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        const int16_t input_val = *local_input_ptr++ + input_offset;
+        for (int i = 0; i < 3; i++)
+        {
+          *acc_buffer_ptr++ += static_cast<int32_t>(local_filter_ptr[i]) * input_val;
+        }
+        local_filter_ptr += 3;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
+{
+  static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const int8_t *local_filter_ptr = filter_ptr;
+      const int8_t *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 8 input channels at a time.
+      for (; ic <= input_depth - 8; ic += 8)
+      {
+        // Load the filters.
+        int16x8_t filter[2];
+        int8x8x2_t filter_s8;
+        filter_s8.val[0] = vld1_s8(local_filter_ptr);
+        filter_s8.val[1] = vld1_s8(local_filter_ptr + 8);
+        local_filter_ptr += 16;
+        for (int i = 0; i < 2; i++)
+        {
+          filter[i] = vmovl_s8(filter_s8.val[i]);
+        }
+        // Load the inputs, add input_offset, duplicate 2-fold.
+        const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+        local_input_ptr += 8;
+        const int16x8_t input_s16 = vmovl_s8(input_s8);
+        const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+        const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+        // Load the accumulators from acc_buffer.
+        int32x4x2_t acc[2];
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+          acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+        }
+        // Multiply-accumulate.
+        for (int j = 0; j < 2; j++)
+        {
+          acc[0].val[j] =
+            vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j]));
+          acc[1].val[j] =
+            vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j]));
+        }
+        // Store the accumulators back to acc_buffer.
+        for (int i = 0; i < 2; i++)
+        {
+          vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+          vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+        }
+        acc_buffer_ptr += 16;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        // Load the inputs.
+        const int16_t input_val = *local_input_ptr++ + input_offset;
+        for (int i = 0; i < 2; i++)
+        {
+          *acc_buffer_ptr++ += static_cast<int32_t>(local_filter_ptr[i]) * input_val;
+        }
+        local_filter_ptr += 2;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 1>
+{
+  static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const int8_t *local_filter_ptr = filter_ptr;
+      const int8_t *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 16 input channels at a time.
+      for (; ic <= input_depth - 16; ic += 16)
+      {
+        // Load the filters.
+        int8x8_t filter_s8_0 = vld1_s8(local_filter_ptr + 8 * 0);
+        int8x8_t filter_s8_1 = vld1_s8(local_filter_ptr + 8 * 1);
+        local_filter_ptr += 16;
+        int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+        int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+        // Load the inputs, add input_offset.
+        int8x8_t input_s8_0 = vld1_s8(local_input_ptr + 8 * 0);
+        int8x8_t input_s8_1 = vld1_s8(local_input_ptr + 8 * 1);
+        local_input_ptr += 16;
+        int16x8_t input_0 = vmovl_s8(input_s8_0);
+        int16x8_t input_1 = vmovl_s8(input_s8_1);
+        input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset));
+        input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset));
+        // Load the accumulators from acc_buffer
+        int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+        int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+        int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+        int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+        acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), vget_low_s16(filter_0));
+        acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), vget_high_s16(filter_0));
+        acc_2 = vmlal_s16(acc_2, vget_low_s16(input_1), vget_low_s16(filter_1));
+        acc_3 = vmlal_s16(acc_3, vget_high_s16(input_1), vget_high_s16(filter_1));
+        // Store the accumulators back to acc_buffer
+        vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+        vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+        vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+        vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+        acc_buffer_ptr += 16;
+      }
+      // Handle 8 input channels at a time.
+      for (; ic <= input_depth - 8; ic += 8)
+      {
+        // Load the filters.
+        const int8x8_t filter_s8 = vld1_s8(local_filter_ptr);
+        local_filter_ptr += 8;
+        const int16x8_t filter = vmovl_s8(filter_s8);
+        // Load the inputs, add input_offset.
+        const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+        local_input_ptr += 8;
+        const int16x8_t input_s16 = vmovl_s8(input_s8);
+        const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+        // Load the accumulators from acc_buffer
+        int32x4_t acc[2];
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+        }
+        // Multiply-accumulate
+        acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter));
+        acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter));
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 2; i++)
+        {
+          vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+        }
+        acc_buffer_ptr += 8;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        const int16_t input_val = *local_input_ptr++ + input_offset;
+        const int16_t filter_val = *local_filter_ptr++;
+        *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 16, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter_s8[i] = vld1_s8(filter_ptr + 8 * i);
+    }
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vmovl_s8(filter_s8[i]);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input_s8[i] = vld1_s8(input_ptr + 8 * i);
+      }
+      input_ptr += input_ptr_increment;
+      int16x8_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vmovl_s8(input_s8[i]);
+      }
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+      }
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(input[i]), vget_low_s16(filter[i]));
+        acc[2 * i + 1] =
+          vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i]));
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 8, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+    const int16x8_t filter = vmovl_s8(filter_s8);
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter));
+      acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter));
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 16>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter_s8[i] = vld1_s8(filter_ptr + 8 * i);
+    }
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vmovl_s8(filter_s8[i]);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      int8_t input_s8 = *input_ptr;
+      input_ptr += input_ptr_increment;
+      int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[2 * i + 0] = vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input);
+        acc[2 * i + 1] = vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input);
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 32>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8_0 = vld1_s8(filter_ptr + 8 * 0);
+    int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 8 * 1);
+    int8x8_t filter_s8_2 = vld1_s8(filter_ptr + 8 * 2);
+    int8x8_t filter_s8_3 = vld1_s8(filter_ptr + 8 * 3);
+    int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+    int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+    int16x8_t filter_2 = vmovl_s8(filter_s8_2);
+    int16x8_t filter_3 = vmovl_s8(filter_s8_3);
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      int8_t input_s8 = *input_ptr;
+      input_ptr += input_ptr_increment;
+      int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+      // Load the accumulators from acc_buffer
+      int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+      int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+      int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+      int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+      int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4);
+      int32x4_t acc_5 = vld1q_s32(acc_buffer_ptr + 4 * 5);
+      int32x4_t acc_6 = vld1q_s32(acc_buffer_ptr + 4 * 6);
+      int32x4_t acc_7 = vld1q_s32(acc_buffer_ptr + 4 * 7);
+      // Multiply-accumulate
+      acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
+      acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
+      acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
+      acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
+      acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input);
+      acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input);
+      acc_6 = vmlal_n_s16(acc_6, vget_low_s16(filter_3), input);
+      acc_7 = vmlal_n_s16(acc_7, vget_high_s16(filter_3), input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+      vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+      vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+      vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+      vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4);
+      vst1q_s32(acc_buffer_ptr + 4 * 5, acc_5);
+      vst1q_s32(acc_buffer_ptr + 4 * 6, acc_6);
+      vst1q_s32(acc_buffer_ptr + 4 * 7, acc_7);
+      acc_buffer_ptr += 32;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 20>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    // NEON wants to load 8 bytes at a time, but 20 is not divisible by 8.
+    // We load the first 16 bytes into filter_s8_{0,1} as usual.
+    // Then we load the 8 last bytes into filter_s8_x  (x for 'extra').
+    // This is redundant: the first 4 bytes of filter_s8_x are the same
+    // as the last 4 bytes of filter_s8_x.
+    int8x8_t filter_s8_0 = vld1_s8(filter_ptr + 8 * 0);
+    int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 8 * 1);
+    int8x8_t filter_s8_x = vld1_s8(filter_ptr + 8 * 1 + 4);
+    int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+    int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+    int16x8_t filter_x = vmovl_s8(filter_s8_x);
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      int8_t input_s8 = *input_ptr;
+      input_ptr += input_ptr_increment;
+      int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+      // Load the accumulators from acc_buffer
+      int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+      int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+      int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+      int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+      int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4);
+      // Multiply-accumulate
+      acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
+      acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
+      acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
+      acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
+      acc_4 = vmlal_n_s16(acc_4, vget_high_s16(filter_x), input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+      vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+      vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+      vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+      vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4);
+      acc_buffer_ptr += 20;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 8>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+    const int16x8_t filter = vmovl_s8(filter_s8);
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      int8_t input_s8 = *input_ptr;
+      input_ptr += input_ptr_increment;
+      int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      acc[0] = vmlal_n_s16(acc[0], vget_low_s16(filter), input);
+      acc[1] = vmlal_n_s16(acc[1], vget_high_s16(filter), input);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 2, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+      // Load the inputs, add input_offset.
+      int16x4_t input_s16 = vdup_n_s16(0);
+      input_s16 = vset_lane_s16((reinterpret_cast<const int16_t *>(input_ptr))[0], input_s16, 0);
+      input_ptr += input_ptr_increment;
+      input_s16 = vset_lane_s16((reinterpret_cast<const int16_t *>(input_ptr))[0], input_s16, 1);
+      input_ptr += input_ptr_increment;
+      input_s16 = vget_low_s16(vmovl_s8(vreinterpret_s8_s16(input_s16)));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc = vmlal_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer.
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+
+    // Handle 1 output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x2_t acc = vld1_s32(acc_buffer_ptr);
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_ptr += input_ptr_increment;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input));
+      // Store the accumulators back to acc_buffer.
+      vst1_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 2;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 4, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    if (num_output_pixels <= 0)
+    {
+      return;
+    }
+
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+
+    // Handle one output pixel at a time until second to the last pixel. Second
+    // to the last because we read eight input pixels while only processing
+    // four.
+    for (; outp < num_output_pixels - 1; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc;
+      acc = vld1q_s32(acc_buffer_ptr);
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += input_ptr_increment;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Multiply-accumulate
+      acc = vmlal_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+
+    // Handle the last output pixel.
+    // Load the accumulators from acc_buffer
+    int32x4_t acc;
+    acc = vld1q_s32(acc_buffer_ptr);
+
+    // Load the inputs, add input_offset.
+    int8x8_t input_s8 = vdup_n_s8(0);
+    input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+    input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+    input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+    input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+    const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+    const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+    // Multiply-accumulate
+    acc = vmlal_s16(acc, filter, input);
+    // Store the accumulators back to acc_buffer
+    vst1q_s32(acc_buffer_ptr, acc);
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 12, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8_0 = vld1_s8(filter_ptr);
+    int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 4);
+    int16x8_t filter_s16_0 = vmovl_s8(filter_s8_0);
+    int16x8_t filter_s16_1 = vmovl_s8(filter_s8_1);
+    int16x4_t filter_0 = vget_low_s16(filter_s16_0);
+    int16x4_t filter_1 = vget_high_s16(filter_s16_0);
+    int16x4_t filter_2 = vget_high_s16(filter_s16_1);
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8_0 = vld1_s8(input_ptr);
+      int8x8_t input_s8_1 = vld1_s8(input_ptr + 4);
+      input_ptr += input_ptr_increment;
+      int16x8_t input_0 = vmovl_s8(input_s8_0);
+      int16x8_t input_1 = vmovl_s8(input_s8_1);
+      input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset));
+      input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset));
+
+      // Load the accumulators from acc_buffer
+      int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+      int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+      int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+
+      // Multiply-accumulate
+      acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), filter_0);
+      acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), filter_1);
+      acc_2 = vmlal_s16(acc_2, vget_high_s16(input_1), filter_2);
+
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+      vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+      vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+
+      acc_buffer_ptr += 12;
+    }
+  }
+};
+#endif
+
+// Accumulates the effect of one row of the filter, on a segment of one row
+// of the output, accessing the corresponding one row of the input.
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, int input_depth,
+                                    int input_width, const int8_t *input_data, int16_t input_offset,
+                                    int pad_width, int depth_multiplier, int filter_width,
+                                    const int8_t *filter_data, int out_x_buffer_start,
+                                    int out_x_buffer_end, int output_depth, int32_t *acc_buffer)
+{
+  // Consistency check parameters. This is important in particular to ensure
+  // that we keep the number of template instantiations minimal, so we don't
+  // increase binary size unnecessarily.
+  static_assert(kFixedDepthMultiplier || !kFixedInputDepth, "");
+  static_assert(kFixedInputDepth || kAllowStrided, "");
+  assert(stride == 1 || kAllowStrided);
+  if (kFixedInputDepth)
+  {
+    assert(input_depth == kFixedInputDepth);
+  }
+  if (kFixedDepthMultiplier)
+  {
+    assert(depth_multiplier == kFixedDepthMultiplier);
+  }
+  assert(output_depth == input_depth * depth_multiplier);
+  const int input_ptr_increment = stride * input_depth;
+  const int8_t *filter_base_ptr = filter_data;
+  for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+  {
+    // For the current (filter_x, filter_y) point in the filter,
+    // compute the boundaries of the corresponding output row segment.
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
+    if (kAllowStrided)
+    {
+      if (stride == 2)
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 1) / 2;
+        out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
+      }
+      else if (stride == 4)
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 3) / 4;
+        out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
+      }
+      else
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+        out_x_loop_end_unclamped =
+          (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
+      }
+    }
+    else
+    {
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped = pad_width + input_width - dilation_factor * filter_x;
+    }
+    // The kernel will have to iterate on the segment of the
+    // output row that starts at out_x_loop_start and out_x_loop_end.
+    const int out_x_loop_start = std::max(out_x_buffer_start, out_x_loop_start_unclamped);
+    const int out_x_loop_end = std::min(out_x_buffer_end, out_x_loop_end_unclamped);
+
+    int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+    const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+    const int8_t *input_ptr = input_data + in_x_origin * input_depth;
+    const int num_output_pixels = out_x_loop_end - out_x_loop_start;
+    QuantizedDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
+      num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset,
+      input_ptr_increment, filter_base_ptr, acc_buffer_ptr);
+    filter_base_ptr += output_depth;
+  }
+}
+
+// generic fallback of DepthwiseConvAccumRow, portable, non-templatized.
+inline void QuantizedDepthwiseConvAccumRowGeneric(int stride, int dilation_factor, int input_depth,
+                                                  int input_width, const int8_t *input_data,
+                                                  int16_t input_offset, int pad_width,
+                                                  int depth_multiplier, int filter_width,
+                                                  const int8_t *filter_data, int out_x_buffer_start,
+                                                  int out_x_buffer_end, int output_depth,
+                                                  int32_t *acc_buffer)
+{
+  const int8_t *filter_base_ptr = filter_data;
+  for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+  {
+    const int out_x_loop_start =
+      std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+    const int out_x_loop_end =
+      std::min(out_x_buffer_end,
+               (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
+
+    int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+    const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+    const int8_t *input_ptr = input_data + in_x_origin * input_depth;
+    const int input_ptr_increment = (stride - 1) * input_depth;
+    for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++)
+    {
+      const int8_t *filter_ptr = filter_base_ptr;
+      for (int ic = 0; ic < input_depth; ++ic)
+      {
+        const int16_t input_val = *input_ptr++ + input_offset;
+        for (int m = 0; m < depth_multiplier; m++)
+        {
+          const int16_t filter_val = *filter_ptr++;
+          *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+        }
+      }
+      input_ptr += input_ptr_increment;
+    }
+    filter_base_ptr += output_depth;
+  }
+}
+
+// Initializes the accumulator buffer with bias values.
+inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
+                                       const int32_t *bias_data, int32_t *acc_buffer)
+{
+  int i = 0;
+#ifdef USE_NEON
+  if (output_depth == 1)
+  {
+    const int32x4_t b = vdupq_n_s32(bias_data[0]);
+    for (; i <= num_output_pixels - 16; i += 16)
+    {
+      vst1q_s32(acc_buffer + i + 0, b);
+      vst1q_s32(acc_buffer + i + 4, b);
+      vst1q_s32(acc_buffer + i + 8, b);
+      vst1q_s32(acc_buffer + i + 12, b);
+    }
+    for (; i <= num_output_pixels - 4; i += 4)
+    {
+      vst1q_s32(acc_buffer + i, b);
+    }
+  }
+  else if (output_depth == 2)
+  {
+    int32x4_t b = vdupq_n_s32(bias_data[0]);
+    b = vsetq_lane_s32(bias_data[1], b, 1);
+    b = vsetq_lane_s32(bias_data[1], b, 3);
+    for (; i <= num_output_pixels - 8; i += 8)
+    {
+      vst1q_s32(acc_buffer + 2 * i + 0, b);
+      vst1q_s32(acc_buffer + 2 * i + 4, b);
+      vst1q_s32(acc_buffer + 2 * i + 8, b);
+      vst1q_s32(acc_buffer + 2 * i + 12, b);
+    }
+    for (; i <= num_output_pixels - 2; i += 2)
+    {
+      vst1q_s32(acc_buffer + 2 * i, b);
+    }
+  }
+  else if (output_depth == 4)
+  {
+    const int32x4_t b = vld1q_s32(bias_data);
+    for (; i <= num_output_pixels - 4; i += 4)
+    {
+      vst1q_s32(acc_buffer + 4 * i + 0, b);
+      vst1q_s32(acc_buffer + 4 * i + 4, b);
+      vst1q_s32(acc_buffer + 4 * i + 8, b);
+      vst1q_s32(acc_buffer + 4 * i + 12, b);
+    }
+    for (; i < num_output_pixels; i++)
+    {
+      vst1q_s32(acc_buffer + 4 * i, b);
+    }
+  }
+  else if (output_depth == 8)
+  {
+    const int32x4_t b0 = vld1q_s32(bias_data);
+    const int32x4_t b1 = vld1q_s32(bias_data + 4);
+    for (; i <= num_output_pixels - 2; i += 2)
+    {
+      vst1q_s32(acc_buffer + 8 * i + 0, b0);
+      vst1q_s32(acc_buffer + 8 * i + 4, b1);
+      vst1q_s32(acc_buffer + 8 * i + 8, b0);
+      vst1q_s32(acc_buffer + 8 * i + 12, b1);
+    }
+    for (; i < num_output_pixels; i++)
+    {
+      vst1q_s32(acc_buffer + 8 * i + 0, b0);
+      vst1q_s32(acc_buffer + 8 * i + 4, b1);
+    }
+  }
+  else if (output_depth == 16)
+  {
+    const int32x4_t b0 = vld1q_s32(bias_data);
+    const int32x4_t b1 = vld1q_s32(bias_data + 4);
+    const int32x4_t b2 = vld1q_s32(bias_data + 8);
+    const int32x4_t b3 = vld1q_s32(bias_data + 12);
+    for (; i < num_output_pixels; i++)
+    {
+      vst1q_s32(acc_buffer + 16 * i + 0, b0);
+      vst1q_s32(acc_buffer + 16 * i + 4, b1);
+      vst1q_s32(acc_buffer + 16 * i + 8, b2);
+      vst1q_s32(acc_buffer + 16 * i + 12, b3);
+    }
+  }
+#endif
+  for (; i < num_output_pixels; i++)
+  {
+    memcpy(acc_buffer + i * output_depth, bias_data, sizeof(acc_buffer[0]) * output_depth);
+  }
+}
+
+inline void DepthwiseConvGeneral(const DepthwiseConvParams &params,
+                                 const int32_t *output_multiplier, const int32_t *output_shift,
+                                 const Shape &input_shape, const int8_t *input_data,
+                                 const Shape &filter_shape, const int8_t *filter_data,
+                                 const Shape & /* bias_shape */, const int32_t *bias_data,
+                                 const Shape &output_shape, int8_t *output_data, int thread_start,
+                                 int thread_end, int thread_dim)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  const int32_t input_offset = params.input_offset;
+  const int32_t output_offset = params.output_offset;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_rows = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+
+  static const int kAccBufferMaxSize = 2048;
+  int32_t acc_buffer[kAccBufferMaxSize];
+  assert(kAccBufferMaxSize >= output_depth);
+  const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth;
+  const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth;
+  UNUSED_RELEASE(kAccBufferActualSize);
+  assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize);
+  assert(kAccBufferActualSize <= kAccBufferMaxSize);
+  assert(kOutputPixelsInAccBuffer >= 1);
+  assert(thread_dim == 0 || thread_dim == 1);
+
+  // row_accum_func will point to the core accumulation function to be used
+  // for this DepthwiseConv op.
+  using row_accum_func_t = decltype(&QuantizedDepthwiseConvAccumRowGeneric);
+  row_accum_func_t row_accum_func = nullptr;
+
+#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER) \
+  if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) &&                                  \
+      (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) &&                             \
+      depth_multiplier == FIXED_DEPTH_MULTIPLIER)                                                 \
+  {                                                                                               \
+    row_accum_func =                                                                              \
+      QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>;   \
+  }
+
+#ifdef USE_NEON
+  // We go over our list of kernels by decreasing order of preference
+  // for the cases where multiple kernels could apply.
+
+  // Start with the fastest kernels: AllowStrided=false, fixed input depth.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 4)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 4)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 8)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 12, 1)
+
+  // Next come the strided kernels: AllowStrided=true, fixed input depth.
+  // They are a bit less efficient, but allow stride!=1.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 16, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 16)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
+
+  // Finally, the kernels allowing a variable input depth,
+  // these are the least efficient but most general kernels.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 3)
+#endif // USE_NEON
+
+  // No matching fast kernel found, use slow fallback.
+  if (!row_accum_func)
+  {
+    row_accum_func = QuantizedDepthwiseConvAccumRowGeneric;
+  }
+
+#undef TFMINI_USE_DEPTHWISECONV_KERNEL
+
+  const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+  const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+  const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
+  // Now that we have determined row_accum_func, we can start work.
+  int batch_start = 0;
+  int batch_end = batches;
+  int row_start = 0;
+  int row_end = output_rows;
+  int output_ptr_offset = 0;
+
+  switch (thread_dim)
+  {
+    case 0:
+      assert(thread_start >= 0);
+      assert(thread_end <= batches);
+      batch_start = thread_start;
+      batch_end = thread_end;
+      output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0);
+      break;
+    case 1:
+      assert(thread_start >= 0);
+      assert(thread_end <= output_rows);
+      row_start = thread_start;
+      row_end = thread_end;
+      output_ptr_offset = row_start * output_width * output_depth;
+      break;
+  }
+
+  int8_t *output_ptr = output_data + output_ptr_offset;
+  int batch_step = (output_rows + row_start - row_end) * output_width * output_depth;
+  for (int b = batch_start; b < batch_end; ++b)
+  {
+    for (int out_y = row_start; out_y < row_end; ++out_y)
+    {
+      const int in_y_origin = (out_y * stride_height) - pad_height;
+      const int filter_y_start =
+        std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
+      const int filter_y_end =
+        std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
+                                  dilation_height_factor);
+      for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
+           out_x_buffer_start += kOutputPixelsInAccBuffer)
+      {
+        const int out_x_buffer_end =
+          std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+        // We call a 'pixel' a group of activation that share all but the
+        // 'depth'/'channel' coordinate. num_output_pixels is the number of
+        // output pixels that we will accumulate in this loop iteration.
+        const int num_output_pixels = out_x_buffer_end - out_x_buffer_start;
+        // Initialize our local accumulator with the bias values, so we don't
+        // have to add them later.
+        DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, acc_buffer);
+        // Accumulation loop. Most of the time should be spent in here.
+        for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+        {
+          const int in_y = in_y_origin + dilation_height_factor * filter_y;
+          row_accum_func(stride_width, dilation_width_factor, input_depth, input_width,
+                         input_data + in_y * input_height_stride + b * input_batch_stride,
+                         input_offset, pad_width, depth_multiplier, filter_width,
+                         filter_data + filter_y * filter_height_stride, out_x_buffer_start,
+                         out_x_buffer_end, output_depth, acc_buffer);
+        }
+        // Finished accumulating int32_t values. Now need to convert them to
+        // the final 8bit form and store them.
+        const int num_output_values = output_depth * num_output_pixels;
+
+        Quantize(output_multiplier, output_shift, output_depth, num_output_values, output_offset,
+                 output_activation_min, output_activation_max, acc_buffer, output_ptr);
+
+        output_ptr += num_output_values;
+      }
+    }
+    output_ptr += batch_step;
+  }
+}
+
+} // namespace depthwise_conv
+
+template <DepthwiseConvOutputRounding kOutputRounding>
+inline void DepthwiseConvWithRounding(const DepthwiseConvParams &params,
+                                      const int32_t *output_multiplier, const int32_t *output_shift,
+                                      const Shape &input_shape, const int8_t *input_data,
+                                      const Shape &filter_shape, const int8_t *filter_data,
+                                      const Shape &bias_shape, const int32_t *bias_data,
+                                      const Shape &output_shape, int8_t *output_data,
+                                      int thread_start, int thread_end, int thread_dim)
+{
+  const int depth_multiplier = params.depth_multiplier;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  UNUSED_RELEASE(depth_multiplier);
+  UNUSED_RELEASE(dilation_width_factor);
+  UNUSED_RELEASE(dilation_height_factor);
+  assert(dilation_width_factor >= 1);
+  assert(dilation_height_factor >= 1);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_depth = input_shape.Dims(3);
+  UNUSED_RELEASE(output_depth);
+  UNUSED_RELEASE(input_depth);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_shape.FlatSize() == output_depth);
+
+//  TODO Use below codes
+#if 0
+// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
+// Jetson TX-2. This compiler does not support the offsetof() macro.
+#if defined(__aarch64__) && !defined(GOOGLE_L4T)
+#if defined(__ANDROID__) && defined(__clang__)
+  CpuFlags cpu_flags;
+  GetCpuFlags(&cpu_flags);
+  const bool has_dot_product_instructions = cpu_flags.neon_dotprod;
+
+  // Dispatch to dot-product 3x3 kernels when supported.
+  if (has_dot_product_instructions)
+  {
+    using optimized_ops::depthwise_conv::DotProduct3x3KernelType;
+    DotProduct3x3KernelType kernel_type = optimized_ops::depthwise_conv::CategorizeDotProductKernel<
+      optimized_ops::depthwise_conv::QuantizationType::kPerChannelInt8>(
+      input_shape, filter_shape, output_shape, params, output_shift);
+    if (kernel_type != DotProduct3x3KernelType::kNone)
+    {
+      DepthwiseConvParams params_copy = params;
+      params_copy.output_shift_per_channel = output_shift;
+      params_copy.output_multiplier_per_channel = output_multiplier;
+      optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3PerChannel<
+        DepthwiseConvImplementation::kUseNeon3x3DotProduct>(
+        params_copy, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+        output_shape, output_data, thread_start, thread_end, thread_dim);
+      return;
+    }
+  }
+
+#endif
+  // Dispatch to non-dot-product 3x3 kernels when supported.
+
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+
+  // Call kernel optimized for depthwise convolutions using 3x3 filters if
+  // parameters are supported.
+  if (optimized_ops::depthwise_conv::Fast3x3FilterKernelSupported<
+        optimized_ops::depthwise_conv::QuantizationType::kPerChannelInt8>(
+        input_shape, filter_shape, stride_width, stride_height, dilation_width_factor,
+        dilation_height_factor, pad_width, pad_height, depth_multiplier, output_shape, 0,
+        output_shift))
+  {
+    optimized_ops::depthwise_conv::DepthwiseConv3x3FilterPerChannel<
+      DepthwiseConvOutputRounding::kUpward>(
+      params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+      bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+    return;
+  }
+#endif
+
+#endif /* end of if 0 */
+
+  depthwise_conv::DepthwiseConvGeneral(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+}
+
+inline void DepthwiseConvImpl(const DepthwiseConvParams &params, const int32_t *output_multiplier,
+                              const int32_t *output_shift, const Shape &input_shape,
+                              const int8_t *input_data, const Shape &filter_shape,
+                              const int8_t *filter_data, const Shape &bias_shape,
+                              const int32_t *bias_data, const Shape &output_shape,
+                              int8_t *output_data, int thread_start, int thread_end, int thread_dim)
+{
+  return DepthwiseConvWithRounding<DepthwiseConvOutputRounding::kAwayFromZero>(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+}
+
+template <typename T, typename TS> struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task
+{
+  DepthwiseConvWorkerTask(const DepthwiseConvParams &params, const int32_t *output_multiplier,
+                          const int32_t *output_shift, const Shape &input_shape,
+                          const T *input_data, const Shape &filter_shape, const T *filter_data,
+                          const Shape &bias_shape, const TS *bias_data, const Shape &output_shape,
+                          T *output_data, int thread_start, int thread_end, int thread_dim)
+    : params_(params), output_multiplier_(output_multiplier), output_shift_(output_shift),
+      input_shape_(input_shape), input_data_(input_data), filter_shape_(filter_shape),
+      filter_data_(filter_data), bias_shape_(bias_shape), bias_data_(bias_data),
+      output_shape_(output_shape), output_data_(output_data), thread_start_(thread_start),
+      thread_end_(thread_end), thread_dim_(thread_dim)
+  {
+  }
+
+  void Run() override
+  {
+    DepthwiseConvImpl(params_, output_multiplier_, output_shift_, input_shape_, input_data_,
+                      filter_shape_, filter_data_, bias_shape_, bias_data_, output_shape_,
+                      output_data_, thread_start_, thread_end_, thread_dim_);
+  }
+
+private:
+  const DepthwiseConvParams &params_;
+  const int32_t *output_multiplier_;
+  const int32_t *output_shift_;
+  const Shape &input_shape_;
+  const T *input_data_;
+  const Shape &filter_shape_;
+  const T *filter_data_;
+  const Shape &bias_shape_;
+  const TS *bias_data_;
+  const Shape &output_shape_;
+  T *output_data_;
+  int thread_start_;
+  int thread_end_;
+  int thread_dim_;
+};
+
+inline int HowManyConvThreads(const Shape &output_shape, const Shape &filter_shape, int thread_dim)
+{
+  constexpr int kMinMulPerThread = 8;
+  const int output_units = output_shape.Dims(thread_dim);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int num_mul_per_unit =
+    FlatSizeSkipDim(output_shape, thread_dim) * filter_height * filter_width;
+  const int min_units_per_thread = kMinMulPerThread / num_mul_per_unit + 1;
+  int thread_count = output_units / min_units_per_thread;
+  return thread_count;
+}
+
+inline void DepthwiseConvPerChannel(const DepthwiseConvParams &params,
+                                    const int32_t *output_multiplier, const int32_t *output_shift,
+                                    const Shape &input_shape, const int8_t *input_data,
+                                    const Shape &filter_shape, const int8_t *filter_data,
+                                    const Shape &bias_shape, const int32_t *bias_data,
+                                    const Shape &output_shape, int8_t *output_data,
+                                    ruy::Context *ruy_context)
+{
+  UNUSED_ALL(params, output_multiplier, output_shift, input_shape, input_data, filter_shape,
+             filter_data, bias_shape, bias_data, output_shape, output_data, ruy_context);
+
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  const int output_batches = output_shape.Dims(0);
+  const int output_rows = output_shape.Dims(1);
+  int thread_count_batch = HowManyConvThreads(output_shape, filter_shape, 0);
+  int thread_count_row = HowManyConvThreads(output_shape, filter_shape, 1);
+  int thread_dim, thread_count, thread_dim_size;
+  if (thread_count_batch > thread_count_row)
+  {
+    thread_dim = 0;
+    thread_dim_size = output_batches;
+    thread_count = thread_count_batch;
+  }
+  else
+  {
+    thread_dim = 1;
+    thread_dim_size = output_rows;
+    thread_count = thread_count_row;
+  }
+
+  // NOTE Borrow RuyContext to get max_num_threads setting
+  // TODO Define and use max_num_threads for CPU backend
+  const int max_threads = ruy_context->max_num_threads();
+  thread_count = std::max(1, std::min(thread_count, max_threads));
+
+  if (thread_count == 1)
+  {
+    DepthwiseConvImpl(params, output_multiplier, output_shift, input_shape, input_data,
+                      filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data,
+                      /*thread_start=*/0,
+                      /*thread_end=*/output_rows, /*thread_dim=*/1);
+  }
+  else
+  {
+    std::vector<DepthwiseConvWorkerTask<int8_t, int32_t>> tasks;
+    // TODO(b/131746020) don't create new heap allocations every time.
+    // At least we make it a single heap allocation by using reserve().
+    tasks.reserve(thread_count);
+    int thread_start = 0;
+    for (int i = 0; i < thread_count; ++i)
+    {
+      int thread_end = thread_start + (thread_dim_size - thread_start) / (thread_count - i);
+      tasks.emplace_back(params, output_multiplier, output_shift, input_shape, input_data,
+                         filter_shape, filter_data, bias_shape, bias_data, output_shape,
+                         output_data, thread_start, thread_end, thread_dim);
+      thread_start = thread_end;
+    }
+    cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), ruy_context);
+  }
+}
+
+} // namespace optimized_integer_ops
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
diff --git a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h

index 93cb21e0ba115a19357790debef8af4c740bf1fd..96e1d9127ab1c2340f16280f354db268a89330c2 100644 (file)
--- a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
@@ -62,7 +62,7 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
  }
  
  template <typename T>
-inline void BroadcastBinaryArithmeticOpSlowQuant8(
+inline typename std::enable_if_t<is_quant8<T>::value> BroadcastBinaryArithmeticOpSlow(
    const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
    const std::function<T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)
@@ -72,11 +72,6 @@ inline void BroadcastBinaryArithmeticOpSlowQuant8(
    NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
    const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
  
-  if ((params.quantized_activation_min < 0) && (params.quantized_activation_max > 255))
-  {
-    throw std::runtime_error{"Support only for Quant8."};
-  }
-
    // Comment from tensorflow lite:
    //
    // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -98,11 +93,10 @@ inline void BroadcastBinaryArithmeticOpSlowQuant8(
        {
          for (int c = 0; c < extended_output_shape.Dims(3); ++c)
          {
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-            ActivationFunctionWithMinMax<uint8_t>(
-              fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
-                 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
-              params.quantized_activation_min, params.quantized_activation_max);
+          output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
+            fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+               input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+            params.quantized_activation_min, params.quantized_activation_max);
          }
        }
      }
diff --git a/compute/cker/include/cker/operation/reference/Conv.h b/compute/cker/include/cker/operation/reference/Conv.h

index 43a5bf256223ade99a997c6c927ef24e932aa267..4474754af872040d02d60f11cb397a801ac8ce66 100644 (file)
--- a/compute/cker/include/cker/operation/reference/Conv.h
+++ b/compute/cker/include/cker/operation/reference/Conv.h
@@ -190,6 +190,116 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
    }
  }
  
+inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
+                 const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data,
+                 const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
+                 const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
+{
+  UNUSED_RELEASE(bias_shape);
+  // Get parameters.
+  const int32_t input_offset = params.input_offset; // r = s(q - Z)
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int32_t output_offset = params.output_offset;
+
+  // Set min and max value of the output.
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+
+  // Consistency check.
+  assert(output_activation_min < output_activation_max);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (bias_data)
+  {
+    assert(bias_shape.FlatSize() == output_depth);
+  }
+
+  // Check dimensions of the tensors.
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      const int in_y_origin = (out_y * stride_height) - pad_height;
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        const int in_x_origin = (out_x * stride_width) - pad_width;
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+        {
+          int32_t acc = 0;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            const int in_y = in_y_origin + dilation_height_factor * filter_y;
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+
+              // Zero padding by omitting the areas outside the image.
+              const bool is_point_inside_image =
+                (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+
+              if (!is_point_inside_image)
+              {
+                continue;
+              }
+
+              for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+              {
+                int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+                int32_t filter_val =
+                  filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
+                // Accumulate with 32 bits accumulator.
+                // In the nudging process during model quantization, we force
+                // real value of 0.0 be represented by a quantized value. This
+                // guarantees that the input_offset is a int8_t, even though
+                // it is represented using int32_t. int32_t += int8_t *
+                // (int8_t - int8_t) so the highest value we can get from each
+                // accumulation is [-127, 127] * ([-128, 127] -
+                // [-128, 127]), which is [-32512, 32512]. log2(32512)
+                // = 14.98, which means we can accumulate at least 2^16
+                // multiplications without overflow. The accumulator is
+                // applied to a filter so the accumulation logic will hold as
+                // long as the filter size (filter_y * filter_x * in_channel)
+                // does not exceed 2^16, which is the case in all the models
+                // we have seen so far.
+                // TODO(jianlijianli): Add a check to make sure the
+                // accumulator depth is smaller than 2^16.
+                acc += filter_val * (input_val + input_offset);
+              }
+            }
+          }
+
+          if (bias_data)
+          {
+            acc += bias_data[out_channel];
+          }
+          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_channel],
+                                              output_shift[out_channel]);
+          acc += output_offset;
+          acc = std::max(acc, output_activation_min);
+          acc = std::min(acc, output_activation_max);
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+            static_cast<int8_t>(acc);
+        }
+      }
+    }
+  }
+}
+
  } // namespace reference
  } // namespace cker
  } // namespace nnfw
diff --git a/docs/conf.py b/docs/conf.py

index 68b7d06285d78339fd5333fe5142c8275c2c39dc..cb5309565623d30e008d77a57753ba0285903379 100644 (file)
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
  author = 'Samsung Research & contributors'
  
  # The full version, including alpha/beta/rc tags
-release = '1.12.0'
+release = '1.15.0'
  
  # -- General configuration ---------------------------------------------------
  
diff --git a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md

index fd5f1349f8549e269ca4dbccff7aebea8c1ea602..1f8c0c28946b19fc2d39c08b00ee7b68b03817c0 100644 (file)
--- a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
+++ b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
@@ -249,7 +249,7 @@ In your host, maybe with another terminal, download packages from
  http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/
  
  ```
-$ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libarmcl-v20.05-17.5.aarch64.rpm
+$ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libarmcl-v21.02-17.5.aarch64.rpm
  
  $ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libhdf5-101-1.10.1-3.85.aarch64.rpm
  
@@ -258,7 +258,7 @@ $ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/stand
  
  (4) Copy to device
  ```
-$ sdb push libarmcl-v20.05-17.5.aarch64.rpm /opt/usr/home/owner/share/tmp/
+$ sdb push libarmcl-v21.02-17.5.aarch64.rpm /opt/usr/home/owner/share/tmp/
  $ sdb push libhdf5-101-1.10.1-3.85.aarch64.rpm /opt/usr/home/owner/share/tmp/
  $ sdb push libhdf5_cpp101-1.10.1-3.85.aarch64.rpm /opt/usr/home/owner/share/tmp/
  ```
@@ -275,7 +275,7 @@ Within Tizen/RPi4 shell
  ```
  sh-3.2# cd /opt/usr/home/owner/share/tmp/
  
-sh-3.2# rpm -i libarmcl-v20.05-17.5.aarch64.rpm
+sh-3.2# rpm -i libarmcl-v21.02-17.5.aarch64.rpm
  sh-3.2# rpm -i libhdf5-101-1.10.1-3.85.aarch64.rpm
  sh-3.2# rpm -i libhdf5_cpp101-1.10.1-3.85.aarch64.rpm
  ```
diff --git a/docs/howto/how-to-build-runtime.md b/docs/howto/how-to-build-runtime.md

index 9181a6138fd4fdbdc2daf83327e5722ccab1b1c1..02ab47537baf08f309db100bbe100a6650eccae6 100644 (file)
--- a/docs/howto/how-to-build-runtime.md
+++ b/docs/howto/how-to-build-runtime.md
@@ -100,7 +100,7 @@ $ tree -L 3 ./Product/out
  ├── bin
  │   ├── nnapi_test
  │   ├── nnpackage_run
-│   ├── tflite_loader_test_tool
+│   ├── tflite_comparator
  │   └── tflite_run
  ├── include
  │   ├── nnfw
diff --git a/docs/howto/how-to-cross-build-runtime-for-aarch64.md b/docs/howto/how-to-cross-build-runtime-for-aarch64.md

index b30ce348a93138d3b5e3d2eb12d6c1f16bc47820..4c8fe1d272c7b4bd52e0a5cc9a1c2205fad06cc8 100644 (file)
--- a/docs/howto/how-to-cross-build-runtime-for-aarch64.md
+++ b/docs/howto/how-to-cross-build-runtime-for-aarch64.md
@@ -16,7 +16,7 @@ Use `install_rootfs.sh` script to prepare Root File System. You should have `sud
  $ sudo ./tools/cross/install_rootfs.sh aarch64
  ```
  - supports `arm`(default) and `aarch64` architecutre for now
-- supports `xenial`(default), `trusty` and `bionic` release
+- supports `bionic`(default) and `focal` release
  
  To see the options,
  ```
@@ -27,7 +27,7 @@ RootFS will be prepared at `tools/cross/rootfs/aarch64` folder.
  
  ***\* CAUTION: The OS version of rootfs must match the OS version of execution target device. On the other hand, you need to match the Ubuntu version of the development PC with the Ubuntu version of rootfs to be used for cross-build. Otherwise, unexpected build errors may occur.***
  
-If you are using Ubuntu 16.04 LTS, select `xenial`, if you are using Ubuntu 18.04 LTS, select `bionic`. You can check your Ubuntu code name in the following way.
+If you are using Ubuntu 18.04 LTS, select `bionic`, if you are using Ubuntu 20.04 LTS, select `focal`. You can check your Ubuntu code name in the following way.
  
  ```
  $ cat /etc/lsb-release
@@ -44,7 +44,7 @@ If a build error occurs because the version of the development system and the ta
  Use `ROOTFS_DIR` to a full path to prepare at alternative path.
  
  ```
-$ ROOTFS_DIR=/home/user/rootfs/aarch64-xenial sudo -E ./tools/cross/install_rootfs.sh aarch64
+$ ROOTFS_DIR=/home/user/rootfs/aarch64-bionic sudo -E ./tools/cross/install_rootfs.sh aarch64
  ```
  
  ### Using proxy
diff --git a/docs/howto/how-to-cross-build-runtime-for-arm.md b/docs/howto/how-to-cross-build-runtime-for-arm.md

index d9c1bfe75726cdead93af1bb1720659d0c7dc766..32c64f85e8ac34b16209e6d96f63b59089ac9a27 100644 (file)
--- a/docs/howto/how-to-cross-build-runtime-for-arm.md
+++ b/docs/howto/how-to-cross-build-runtime-for-arm.md
@@ -14,7 +14,7 @@ Use `install_rootfs.sh` script to prepare Root File System. You should have `sud
  $ sudo ./tools/cross/install_rootfs.sh arm
  ```
  - supports `arm`(default) and `aarch` architecutre for now
-- supports `bionic`(default), `trusty`, `xenial` and `focal` release
+- supports `bionic`(default), and `focal` release
  
  To see the options,
  ```
@@ -25,7 +25,7 @@ RootFS will be prepared at `tools/cross/rootfs/arm` folder.
  
  ***\* CAUTION: The OS version of rootfs must match the OS version of execution target device. On the other hand, you need to match the Ubuntu version of the development PC with the Ubuntu version of rootfs to be used for cross-build. Otherwise, unexpected build errors may occur.***
  
-If you are using Ubuntu 16.04 LTS, select `xenial`, if you are using Ubuntu 18.04 LTS, select `bionic`. You can check your Ubuntu code name in the following way.
+If you are using Ubuntu 18.04 LTS, select `bionic`, if you are using Ubuntu 20.04 LTS, select `focal`. You can check your Ubuntu code name in the following way.
  
  ```
  $ cat /etc/lsb-release
@@ -42,7 +42,7 @@ If a build error occurs because the version of the development system and the ta
  Use `ROOTFS_DIR` to a full path to prepare at alternative path.
  
  ```
-$ ROOTFS_DIR=/home/user/rootfs/arm-xenial sudo -E ./tools/cross/install_rootfs.sh arm
+$ ROOTFS_DIR=/home/user/rootfs/arm-bionic sudo -E ./tools/cross/install_rootfs.sh arm
  ```
  
  ### Using proxy
@@ -81,32 +81,20 @@ $ update-alternatives --install /usr/bin/arm-linux-gnueabihf-gcc arm-linux-gnuea
      --slave /usr/bin/arm-linux-gnueabihf-gcov arm-linux-gnueabihf-gcov /usr/bin/arm-linux-gnueabihf-gcov-8
  ```
  
-### Ubuntu 16.04 LTS
+### Ubuntu 20.04 LTS
  
-On Ubuntu 16.04 or older, follow the next steps:
+Same with Ubuntu 18.04 LTS. (except g++ version)
  
-```
-$ cd ~/your/path
-$ wget https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11/arm-linux-gnueabihf/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf.tar.xz
-$ tar xvf gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf.tar.xz
-$ echo 'export PATH=~/your/path/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf/bin:$PATH' >> ~/.bashrc
-```
+## Build and install ARM Compute Library
  
-Make sure you get `libstdc++.so` updated on your target with your new toolchain's corresponding one.
+Mostly you only need once of ACL (ARM Compute Library) build.
  
-For example, if you installed gcc-linaro-7.2.1-2017.11 above, do
+To build ACL, you need to install scons
  
  ```
-$ wget https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11/arm-linux-gnueabihf/runtime-gcc-linaro-7.2.1-2017.11-arm-linux-gnueabihf.tar.xz
-$ tar xvf runtime-gcc-linaro-7.2.1-2017.11-arm-linux-gnueabihf.tar.xz
+$ sudo apt-get install scons
  ```
  
-Then, copy `libstdc++.so.6.0.24` into `/usr/lib/arm-linux-gnueabihf`, and update symbolic links on your device.
-
-## Build and install ARM Compute Library
-
-Mostly you only need once of ACL build.
-
  ACL will be automatically installed in `externals/acl` when you build runtime without any changes.
  
  You can check ACL source information in `infra/cmake/packages/ARMComputeSourceConfig.cmake`
diff --git a/docs/release/1.13/index.rst b/docs/release/1.13/index.rst

new file mode 100644 (file)

index 0000000..04aa2b7
--- /dev/null
+++ b/docs/release/1.13/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Wed Jan 14 16:48:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.13.0.md
diff --git a/docs/release/1.13/release-note-1.13.0.md b/docs/release/1.13/release-note-1.13.0.md

new file mode 100644 (file)

index 0000000..31e3a0d
--- /dev/null
+++ b/docs/release/1.13/release-note-1.13.0.md
@@ -0,0 +1,12 @@
+# Release Note 1.13.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Add optimization pass: ConvertNCHWToNHWC, FoldSparseToDensePass, FuseBatchNormWithConvPass, ForwardReshapeToUnaryOpPass, RemoveUnnecessarySlicePass, RemoveUnnecessarySplitPass,  RemoveUnnecessaryReshapePass, RemoveRedundantReshape, SubstituteTransposeToReshapePass, SubstituteSqueezeToReshapePass, 
+- Support more operators: FAKE_QUANT
+- Enhancements: Support auto generated random input for record-minmax (for better quantization testing)
+- Changes: `--all` option to `--O1` in circle2circle(and one-optimize)
+- Fixes: `tf2tfliteV2` accept input shapes `--v2` option, lots of fixes for increase test coverage
+- Experimental: Compile ONNX models to circle
diff --git a/docs/release/1.14/index.rst b/docs/release/1.14/index.rst

new file mode 100644 (file)

index 0000000..c3d10bf
--- /dev/null
+++ b/docs/release/1.14/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu Mar 18 16:47:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.14.0.md
diff --git a/docs/release/1.14/release-note-1.14.0.md b/docs/release/1.14/release-note-1.14.0.md

new file mode 100644 (file)

index 0000000..7c567b0
--- /dev/null
+++ b/docs/release/1.14/release-note-1.14.0.md
@@ -0,0 +1,10 @@
+# Release Note 1.14.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- `one-codegen` interface now distinguishes own arguments from backend's.
+- Adds `RemoveUnnecessaryStridedSlice` optimization pass.
+- Introduces experimental support for generating profile data.
+  - Adds `--generate_profile_data` option to `one-optimize`, `one-quantize`.
diff --git a/docs/release/1.15/index.rst b/docs/release/1.15/index.rst

new file mode 100644 (file)

index 0000000..1680aaf
--- /dev/null
+++ b/docs/release/1.15/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu Mar 18 16:47:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.15.0.md
diff --git a/docs/release/1.15/release-note-1.15.0.md b/docs/release/1.15/release-note-1.15.0.md

new file mode 100644 (file)

index 0000000..106cefd
--- /dev/null
+++ b/docs/release/1.15/release-note-1.15.0.md
@@ -0,0 +1,42 @@
+# Release Note 1.15.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Support more Ops for quantization
+- Fix `record-minmax` tool for bool type, NaN values
+- Fix `one-cmds` test scripts
+- Remove `stdex` module
+- `arser` supports short option
+
+
+## ONE Runtime
+
+### Runtime backend supports more operations and types
+
+- CPU backend
+  - Add: int8
+  - AvgPool2d: int8
+  - Conv2D: int8
+  - DepthwiseConv2D: int8
+  - Div: uint8
+  - Elu: float
+  - ExpandDims: int8
+  - LogicalAnd: boolean
+  - Maximum: uint8
+  - MaxPool2D: int8
+  - Minimum: uint8
+  - Mul: int8 
+  - Pad: int8
+  - PadV2: int8
+  - Quantize: uint8, int8
+  - Reshape: int8
+  - Resizebiliear: int8
+  - Softmax: int8
+  - Squeeze: int8
+  - Sub: int8
+
+### ARM Compute Library Update 
+
+- ONERT uses Compute Library v21.02
diff --git a/docs/release/index.rst b/docs/release/index.rst

index 1a5a780c0a18e686a80c92dd42ce2437134315af..31cd0d7926d12f438413acea364b8d8197110f1d 100644 (file)
--- a/docs/release/index.rst
+++ b/docs/release/index.rst
@@ -23,3 +23,4 @@ Release
    ./1.10/index
    ./1.11/index
    ./1.12/index
+  ./1.13/index
diff --git a/docs/runtime/backend-api.md b/docs/runtime/backend-api.md

index b32690a0076c82c13746b6728b81505050ea2387..54ef870904c764379cce7c5d1788b97d5343ce31 100644 (file)
--- a/docs/runtime/backend-api.md
+++ b/docs/runtime/backend-api.md
@@ -28,24 +28,25 @@ C API above is just an entrypoint and it delegates core stuff to C++ API.
  Here are major classes are described below. One must implement these classes(and some more classes) to create a backend.
  
  - `Backend` : Responsible to create a backend context which is a set of backend components
-- `IConfig` : Configurations and miscellaneous stuff
+- `BackendContext` : Holds data for the current session and also responsible to create tensor objects and kernels
+  - `BackendContext::genTensors` : Create tensor objects
+  - `BackendContext::genKernels` : Create kernels
+- `IConfig` : Configurations and miscellaneous stuff (not session based, global)
  - `ITensorRegistry` : A set of tensor(`ITensor`) objects that are used by the current backend
-- `ITensorBuilder` : Make tensor object and register it to `ITensorRegistry` and static tensors
-- `IKernelGenerator` : Generates operation kernels
  
  Please refer to each class document for details. You may refer to [Bundle Backends](#bundle-backends) for actual implementation samples.
  
  ## Provided Backend Implementations
  
-We provide some backends along with the runtime. There is the special backend `controlflow` which is part of runtime core, and some bundle backends which are baseline backends and samples of backend implementation.
+We provide some backends along with the runtime. There is the special backend `builtin` which is part of runtime core, and some bundle backends which are baseline backends and samples of backend implementation.
  
-## `controlflow` Backend
+## `builtin` Backend
  
-`controlflow` is a special backend that is always loaded(statically linked, part of runtime core). It is implemented just like other backends, but there are some things that it does exclusively.
+`builtin` is a special backend that is always loaded(statically linked, part of runtime core). It is implemented just like other backends, but there are some things that it does exclusively.
  
  - Has kernels for If, While and Permute operations (Kernels from other backends are never be used)
-- The runtime core directly creates `controlflow`'s tensor objects to accept user-given input and output buffers
-- The runtime core gives the executor context to `controlflow` backend which allows control flow ops can change execution flow properly
+- The runtime core directly creates `builtin`'s tensor objects to accept user-given input and output buffers
+- The runtime core gives the executor context to `builtin` backend which allows control flow ops can change execution flow properly
  
  ## Bundle Backends
  
diff --git a/docs/runtime/supported-operations-backend.md b/docs/runtime/supported-operations-backend.md

index 04ece97656df085224c49cf7a8e3731b331ea652..b5b5c6268ab15440dc00713e7c8ad21d92711887 100644 (file)
--- a/docs/runtime/supported-operations-backend.md
+++ b/docs/runtime/supported-operations-backend.md
@@ -1,6 +1,6 @@
  # Supported Operations and backend
  
-As of 2020-12-07
+As of 2021-03-08
  
  ### Raw-data format (float32, int32, boolean, etc)
  
@@ -14,6 +14,7 @@ ArgMin | O | O | O
  AvgPool2D | O | O | O
  BatchMatmul | O |   |
  BatchToSpaceND | O | O | O
+BroadcastTo | O |   |
  Cast | O | O | O
  Concat | O | O | O
  Conv2D | O | O | O
@@ -22,13 +23,16 @@ Custom | O |   |
  DepthToSpace | O | O | O
  DepthwiseConv2D | O | O | O
  Div | O | O | O
+Einsum | O |   |
+Elu | O |   |
  EmbeddingLookup |   | O | O
  Equal | O | O | O
  Exp | O | O | O
-ExpandDims | O |   |
+ExpandDims | O | O | O
  Fill | O |   |
  Floor | O | O | O
  FullyConnected | O | O | O
+FusedBatchNorm | O |   |
  Gather | O | O | O
  Greater | O | O | O
  GreaterEqual | O | O | O
@@ -42,13 +46,13 @@ Less | O | O | O
  LessEqual | O | O | O
  LocalResponseNormalize |   | O | O
  Log | O |   |
-LogicalAnd |   | O | O
+LogicalAnd | O | O | O
  LogicalNot | O | O | O
  LogicalOr | O | O | O
  Logistic | O | O | O
  LogSoftmax | O |   |
-LSHProjection |   |   |
  LSTM |   | O | O
+MatrixBandPart | O |   |
  Maximum | O | O | O
  MaxPool2D | O | O | O
  Mean | O | O | O
@@ -65,16 +69,18 @@ PReLU |   | O | O
  Quantize | O |   |
  Range | O |   |
  Rank | O |   |
+ReduceAny(All) | O |   |
  ReduceAny(Any) | O |   |
  ReduceMax(Max) | O | O | O
  ReduceMin(Min) | O | O | O
  ReduceProd | O |   |
  ReduceSum(Sum) | O | O | O
  ReLU | O | O | O
-ReLU6 |   | O | O
+ReLU6 | O | O | O
  Reshape | O | O | O
  ResizeBilinear | O | O | O
-ReverseV2 | O |   | O
+ResizeNearestNeighbor |   | O | O
+ReverseV2 | O | O | O
  RNN |   | O | O
  Round | O |   |
  Rsqrt | O | O | O
@@ -87,14 +93,13 @@ Softmax | O | O | O
  SpaceToBatchND | O | O | O
  SpaceToDepth | O | O | O
  Split | O | O | O
-SplitV | O |   |
+SplitV | O | O |
  Sqrt | O | O | O
  Square | O |   |   |
  SquaredDifference | O | O | O
  Squeeze | O | O | O
  StridedSlice | O | O | O
  Sub | O | O | O
-Svdf |   |   |
  Tanh | O | O | O
  Tile | O |   |
  TopKV2 |   |   | O
@@ -121,9 +126,11 @@ Custom | O |   |
  DepthToSpace | O | O | O
  DepthwiseConv2D | O | O | O
  Dequantize | O | O | O
+Div | O |   |
  EmbeddingLookup |   | O | O
  Equal | O | O | O
-ExpandDims | O |   |
+Erf | O |   |
+ExpandDims | O | O | O
  FullyConnected | O | O | O
  Gather | O | O | O
  Greater | O | O | O
@@ -134,17 +141,17 @@ Less | O | O | O
  LessEqual | O | O | O
  Logistic | O | O | O
  LogSoftmax | O |   |
-Maximum |   | O | O
+Maximum | O | O | O
  MaxPool2D | O | O | O
  Mean | O | O | O
-Minimum |   | O | O
+Minimum | O | O | O
  Mul | O | O |
  NotEqual | O | O | O
-OneHot |   | O |
  Pack |   | O | O
  Pad | O | O | O
  PadV2 | O | O | O
  PReLU |   | O | O
+Quantize | O |   |
  Rank | O |   |
  ReduceMax(Max) |   | O |
  ReduceMin(Min) |   | O |
@@ -152,15 +159,17 @@ ReduceSum(Sum) | O | O |
  ReLU |   | O | O
  ReLU6 |   | O | O
  Reshape | O | O | O
-ResizeBilinear | O |   | O
+ResizeBilinear | O | O | O
+ResizeNearestNeighbor |   | O | O
  Shape | O |   |
  Slice | O | O | O
  Softmax | O | O | O
  SpaceToBatchND | O | O | O
  SpaceToDepth | O | O | O
  Split | O | O | O
-SplitV | O |   |
+SplitV | O | O |
  Squeeze | O | O | O
+StatelessRandomUniform | O |   |
  StridedSlice |   | O | O
  Sub | O | O | O
  Tanh | O | O | O
@@ -173,10 +182,27 @@ Unpack(Unstack) |   | O | O
  
  Operation | CPU | ACL-CL | ACL-NEON
  -- | -- | -- | --
+Add | O | O | O
  ArgMax | O | O | O
  ArgMin | O | O | O
-Concat | O |   |
+AvgPool2D | O |   |
+Concat | O | O | O
+Conv2D | O |   |
  DepthToSpace | O |   |
-Dequantize | O |   |
+DepthwiseConv2D | O |   |
+Dequantize | O | O | O
+ExpandDims | O | O | O
+MaxPool2D | O |   |
+Mul | O | O | O
+Pad | O | O | O
+PadV2 | O |   |
+PReLU |   | O | O
+Quantize | O |   |
  Rank | O |   |
+Reshape | O | O | O
+ResizeBilinear | O | O | O
+ResizeNearestNeighbor |   | O | O
  Shape | O |   |
+Softmax | O | O | O
+Squeeze | O | O | O
+Sub | O | O | O
diff --git a/infra/cmake/modules/IdentifyPlatform.cmake b/infra/cmake/modules/IdentifyPlatform.cmake

index 69fe48cad5e23f1d35caacb2a840d3373a6f3635..cf56dd0861d3ae0c345ea042008c75061b6ff8a1 100644 (file)
--- a/infra/cmake/modules/IdentifyPlatform.cmake
+++ b/infra/cmake/modules/IdentifyPlatform.cmake
@@ -39,6 +39,8 @@ elseif("${HOST_ARCH}" STREQUAL "armv7l")
    set(HOST_ARCH_BASE "arm")
  elseif("${HOST_ARCH}" STREQUAL "aarch64")
    set(HOST_ARCH_BASE "aarch64")
+elseif("${HOST_ARCH}" STREQUAL "i686")
+  set(HOST_ARCH_BASE "i686")
  else()
    message(FATAL_ERROR "'${HOST_ARCH}' architecture is not supported")
  endif()
@@ -49,6 +51,8 @@ elseif("${TARGET_ARCH}" STREQUAL "armv7l")
    set(TARGET_ARCH_BASE "arm")
  elseif("${TARGET_ARCH}" STREQUAL "aarch64")
    set(TARGET_ARCH_BASE "aarch64")
+elseif("${TARGET_ARCH}" STREQUAL "i686")
+  set(TARGET_ARCH_BASE "i686")
  else()
    message(FATAL_ERROR "'${TARGET_ARCH}' architecture is not supported")
  endif()
diff --git a/infra/cmake/packages/ARMComputeSourceConfig.cmake b/infra/cmake/packages/ARMComputeSourceConfig.cmake

index 0ffa0cd35468c63df38b058c2db83ba3c68fbd60..16e12bbcabaf49e6b31d701d452f15afd0547608 100644 (file)
--- a/infra/cmake/packages/ARMComputeSourceConfig.cmake
+++ b/infra/cmake/packages/ARMComputeSourceConfig.cmake
@@ -8,7 +8,7 @@ function(_ARMComputeSource_import)
    nnas_include(OptionTools)
  
    envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v20.05.tar.gz)
+  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v21.02.tar.gz)
    ExternalSource_Download(ARMCOMPUTE ${ARMCOMPUTE_URL})
  
    set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
diff --git a/infra/cmake/packages/AbseilSourceConfig.cmake b/infra/cmake/packages/AbseilSourceConfig.cmake

index 8be732660212d74a8b50b390e8af12ae6fd40d6a..8aeb86db3c2e84716e41c2a0de10b0054f43fe9e 100644 (file)
--- a/infra/cmake/packages/AbseilSourceConfig.cmake
+++ b/infra/cmake/packages/AbseilSourceConfig.cmake
@@ -7,19 +7,14 @@ function(_AbseilSource_import)
    nnas_include(ExternalSourceTools)
    nnas_include(OptionTools)
  
-  # NOTE TensorFlow 1.12 downloads abseil from the following URL
-  # - https://github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz
-  #
-  # The last change of "48cd2c3f351" was commited on 2018.09.27
-  #
-  # Let's use the latest released version (2020-02 release patch 2)
+  # NOTE TensorFlow 2.3 downloads abseil from the following URL
    envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/20200225.2.tar.gz)
+  envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz)
  
    ExternalSource_Download(ABSEIL
      DIRNAME ABSEIL
      URL ${ABSEIL_URL}
-    CHECKSUM MD5=73f2b6e72f1599a9139170c29482ddc4)
+    CHECKSUM MD5=4d9aa7e757adf48fef171c85f0d88552)
  
    set(AbseilSource_DIR ${ABSEIL_SOURCE_DIR} PARENT_SCOPE)
    set(AbseilSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/OouraFFTSourceConfig.cmake b/infra/cmake/packages/OouraFFTSourceConfig.cmake

new file mode 100644 (file)

index 0000000..be551fb
--- /dev/null
+++ b/infra/cmake/packages/OouraFFTSourceConfig.cmake
@@ -0,0 +1,19 @@
+function(_OouraFFTSource_import)
+  if(NOT DOWNLOAD_OOURAFFT)
+    set(OouraFFTSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_OOURAFFT)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # NOTE TensorFlow 2.3 downloads OOURAFFT from the following URL
+  envoption(OOURAFFT_URL https://github.com/petewarden/OouraFFT/archive/v1.0.tar.gz)
+
+  ExternalSource_Download(OOURAFFT ${OOURAFFT_URL})
+
+  set(OouraFFTSource_DIR ${OOURAFFT_SOURCE_DIR} PARENT_SCOPE)
+  set(OouraFFTSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_OouraFFTSource_import)
+
+_OouraFFTSource_import()
diff --git a/infra/command/build-docker-image b/infra/command/build-docker-image

index f05266b581a6a212c624fa97a07eb90447264ec5..9a044385c9d60cd1eba6c519b87430ef05d20379 100644 (file)
--- a/infra/command/build-docker-image
+++ b/infra/command/build-docker-image
@@ -14,7 +14,7 @@ DOCKER_FILE_RPATH_BASE="infra/docker"
  DOCKER_BUILD_ARGS=()
  
  # Default setting
-UBUNTU_CODENAME="xenial"
+UBUNTU_CODENAME="bionic"
  DOCKER_TAG="latest"
  
  while [[ $# -gt 0 ]]
diff --git a/infra/command/format b/infra/command/format

index c57e6dc8f7fac2c740c1c34ce8e3eca322ba0161..dc006639de9913a01c16c689795c4128a3a58e8a 100644 (file)
--- a/infra/command/format
+++ b/infra/command/format
@@ -131,7 +131,7 @@ function check_cpp_files() {
      return
    fi
  
-  CLANG_FORMAT_CANDIDATES+=("clang-format-3.9")
+  CLANG_FORMAT_CANDIDATES+=("clang-format-8")
    for CLANG_FORMAT_CANDIDATE in ${CLANG_FORMAT_CANDIDATES[@]}; do
      if command_exists ${CLANG_FORMAT_CANDIDATE} ; then
        CLANG_FORMAT="${CLANG_FORMAT_CANDIDATE}"
@@ -140,29 +140,14 @@ function check_cpp_files() {
    done
  
    if [[ -z ${CLANG_FORMAT}  ]]; then
-    echo "[ERROR] clang-format-3.9 is unavailable"
-    echo
-    echo "        Please install clang-format-3.9 before running format check"
-    exit 1
-  fi
-
-  # Migration to clang-format-8
-  # TODO Remove this after migration to clang-format-8
-  CLANG_FORMAT_8="clang-format-8"
-  if ! command_exists $CLANG_FORMAT_8_CANDIDATE; then
      echo "[ERROR] clang-format-8 is unavailable"
      echo
      echo "        Please install clang-format-8 before running format check"
-    echo "        (or use latest docker image if you are using docker for format check)"
      exit 1
    fi
-  for DIR_CLANG_FORMAT_8 in $(git ls-files -co --exclude-standard '*/.clang-format'); do
-    DIRECTORIES_USE_CLANG_FORMAT_8+=($(dirname "${DIR_CLANG_FORMAT_8}"))
-  done
  
    # Check c++ files
    FILES_TO_CHECK_CPP=()
-  FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8=()
    for f in ${FILES_TO_CHECK[@]}; do
      # Manually ignore style checking
      if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
@@ -171,21 +156,7 @@ function check_cpp_files() {
  
      # File extension to check
      if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
-
-      # Check clang-format-8 target files first
-      # TODO Remove this after migration to clang-format-8
-      FOUND_CLANG_8=0
-      for USE_CLANG_FORMAT_8 in ${DIRECTORIES_USE_CLANG_FORMAT_8[@]}; do
-        if [[ $f = $USE_CLANG_FORMAT_8* ]]; then
-          FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8+=("$f")
-          FOUND_CLANG_8=1
-          break
-        fi
-      done
-
-      if [[ $FOUND_CLANG_8 -ne 1 ]]; then
-        FILES_TO_CHECK_CPP+=("${f}")
-      fi
+      FILES_TO_CHECK_CPP+=("${f}")
      fi
    done
  
@@ -202,16 +173,6 @@ function check_cpp_files() {
        INVALID_EXIT=${EXIT_CODE}
      fi
    fi
-
-  # Check by clang-format-8
-  # TODO Remove this after migration to clang-format-8
-  if [[ ${#FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8} -ne 0 ]]; then
-    ${CLANG_FORMAT_8} -i ${FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8[@]}
-    EXIT_CODE=$?
-    if [[ ${EXIT_CODE} -ne 0 ]]; then
-      INVALID_EXIT=${EXIT_CODE}
-    fi
-  fi
  }
  
  function check_python_files() {
diff --git a/infra/command/gen-coverage-report b/infra/command/gen-coverage-report

index c3a8202e7f5121e96da680fa852fceb0ec4ce643..bf65b1bfb76cb3c1c4db39b16a489595885916d3 100644 (file)
--- a/infra/command/gen-coverage-report
+++ b/infra/command/gen-coverage-report
@@ -67,12 +67,9 @@ done
    "${CANDIDATES[@]}"
  
  # Exclude *.test.cpp files from coverage report
-"${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
-  '*.test.cpp'
-
  # Exclude flatbuffer generated files from coverage report
  "${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
-  '*_schema_generated.h'
+  '*.test.cpp' '*_schema_generated.h'
  
  # Final coverage data
  cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH}
diff --git a/infra/docker/xenial/Dockerfile b/infra/docker/xenial/Dockerfile

deleted file mode 100644 (file)

index ae3c464..0000000
--- a/infra/docker/xenial/Dockerfile
+++ /dev/null
@@ -1,67 +0,0 @@
-FROM ubuntu:16.04
-
-ARG UBUNTU_MIRROR
-
-RUN if [ -n "$http_proxy" ] ; then echo "Acquire::http::proxy \"${http_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$https_proxy" ] ; then echo "Acquire::https::proxy \"${https_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$UBUNTU_MIRROR" ] ; then sed "s/archive.ubuntu.com/${UBUNTU_MIRROR}/g" -i /etc/apt/sources.list ; fi
-
-# Install 'add-apt-repository'
-RUN apt-get update && apt-get -qqy install software-properties-common
-
-# Build tool
-RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov
-
-# Install extra dependencies (Caffe, nnkit)
-RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
-
-# Install protocol buffer
-RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
-
-# Additonal tools
-RUN apt-get update && \
-    apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
-RUN pip3 install --upgrade pip
-RUN pip3 install yapf==0.22.0 numpy
-
-# Install google test (source)
-RUN apt-get update && apt-get -qqy install libgtest-dev
-
-###
-### NOTE: Don't add new package install using apt-get or pip below this line
-###
-
-# Install native build tool gcc version 6.x
-RUN add-apt-repository ppa:ubuntu-toolchain-r/test && apt-get update && apt-get -qqy install gcc-6 g++-6
-RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 --slave /usr/bin/g++ g++ /usr/bin/g++-6 && update-alternatives --config gcc
-
-# Install cross build tool gcc version 6.x
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.3-2017.02/arm-linux-gnueabihf/gcc-linaro-6.3.1-2017.02-x86_64_arm-linux-gnueabihf.tar.xz -O gcc-hardfp.tar.xz -nv
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.2-2016.11/arm-linux-gnueabi/gcc-linaro-6.2.1-2016.11-x86_64_arm-linux-gnueabi.tar.xz -O gcc-softfp.tar.xz -nv
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.2-2016.11/aarch64-linux-gnu/gcc-linaro-6.2.1-2016.11-x86_64_aarch64-linux-gnu.tar.xz -O gcc-aarch64.tar.xz -nv
-RUN tar -xf gcc-hardfp.tar.xz -C /opt/ && rm -rf gcc-hardfp.tar.xz
-RUN tar -xf gcc-softfp.tar.xz -C /opt/ && rm -rf gcc-softfp.tar.xz
-RUN tar -xf gcc-aarch64.tar.xz -C /opt/ && rm -rf gcc-aarch64.tar.xz
-ENV PATH "/opt/gcc-linaro-6.2.1-2016.11-x86_64_arm-linux-gnueabi/bin:/opt/gcc-linaro-6.3.1-2017.02-x86_64_arm-linux-gnueabihf/bin:/opt/gcc-linaro-6.2.1-2016.11-x86_64_aarch64-linux-gnu/bin:$PATH"
-
-###
-### NOTE: Don't add build & install process using installed buildtool above this line
-###
-
-# Build and install google test static libraries
-WORKDIR /root/gtest
-RUN cmake /usr/src/gtest
-RUN make
-RUN mv *.a /usr/lib
-WORKDIR /root
-RUN rm -rf gtest
-
-# Install gbs & sdb
-RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubuntu_16.04/ /' | cat >> /etc/apt/sources.list
-RUN apt-get update && apt-get -qqy install gbs
-RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_3.1.4_ubuntu-64.zip -O sdb.zip
-RUN unzip -d tmp sdb.zip && rm sdb.zip
-RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp
-
-# Clean archives (to reduce image size)
-RUN apt-get clean -y
diff --git a/infra/nncc/command/utcount b/infra/nncc/command/utcount

index d06c5c9def4192de75866a1b83be80fa273ec9bb..64aaace9bc372f170e82d8468c603434e94e2fbe 100644 (file)
--- a/infra/nncc/command/utcount
+++ b/infra/nncc/command/utcount
@@ -9,15 +9,17 @@ if [[ ! -d "${BUILD_WORKSPACE_PATH}" ]]; then
    exit 255
  fi
  
-BUILD_ITEMS="angkor cwrap pepper-str pepper-strcast pp stdex \
+BUILD_ITEMS="angkor cwrap pepper-str pepper-strcast pp \
  oops pepper-assert \
  hermes hermes-std \
  loco locop locomotiv logo-core logo \
-foder souschef arser vconone \
+foder souschef arser vconone crew \
  safemain mio-circle mio-tflite \
  tflite2circle \
  luci \
  luci-interpreter \
+luci-eval-driver \
+luci-pass-value-test \
  luci-value-test \
  record-minmax \
  circle2circle circle-quantizer"
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake

index f6ad0cada04ea621db24268c5d50fdf3d2a8fa94..87c2c86f6a1e6a4b502b71285fa3a36cd5451b36 100644 (file)
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -24,7 +24,7 @@ option(BUILD_NNAPI_TEST "Build nnapi_test" ON)
  option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
  option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
  option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
-option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" ON)
+option(BUILD_TFLITE_COMPARATOR_TEST_TOOL "Build tflite loader testing tool" ON)
  option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
  option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
@@ -64,6 +64,7 @@ option(DOWNLOAD_NONIUS "Download nonius source" ON)
  option(DOWNLOAD_BOOST "Download boost source" OFF)
  option(DOWNLOAD_RUY "Download ruy source" ON)
  option(DOWNLOAD_CPUINFO "Download cpuinfo source" ON)
+option(DOWNLOAD_OOURAFFT "Download Ooura FFT source" ON)
  option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
  option(BUILD_BOOST "Build boost source" OFF)
  option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/buildtool/config/config_i686-tizen.cmake b/infra/nnfw/cmake/buildtool/config/config_i686-tizen.cmake

new file mode 100644 (file)

index 0000000..3929e07
--- /dev/null
+++ b/infra/nnfw/cmake/buildtool/config/config_i686-tizen.cmake
@@ -0,0 +1,17 @@
+#
+# i686 tizen compile options
+#
+
+message(STATUS "Building for i686 Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for i686-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    )
diff --git a/infra/nnfw/cmake/options/options_i686-tizen.cmake b/infra/nnfw/cmake/options/options_i686-tizen.cmake

new file mode 100644 (file)

index 0000000..7a425f0
--- /dev/null
+++ b/infra/nnfw/cmake/options/options_i686-tizen.cmake
@@ -0,0 +1,12 @@
+#
+# i686 tizen cmake options
+#
+option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+
+option(BUILD_LOGGING "Build logging runtime" OFF)
+option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
diff --git a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake

index 1b5a32ef6e8978c8513338f9920dd061bfbdad65..4761e848c7d81145d4d09f0ecf4a714f8ab740a7 100644 (file)
--- a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
+++ b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
@@ -65,7 +65,7 @@ endfunction(_ARMCompute_Import)
  # Let's build and install ARMCompute libraries
  function(_ARMCompute_Build ARMComputeInstall_DIR)
    set(PKG_NAME "ARMCOMPUTE")
-  set(PKG_IDENTIFIER "20.05")
+  set(PKG_IDENTIFIER "21.02")
    set(INSTALL_STAMP_PATH "${ARMComputeInstall_DIR}/${PKG_NAME}.stamp")
    set(ARMComputeBuild_DIR "${CMAKE_BINARY_DIR}/externals/armcompute")
  
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake

index e152398053ea0ce25f15cb4df76379dc7a7c1609..e4fbc3ad3c2a4ced3f8ad15125c7443ab7fedcd9 100644 (file)
--- a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
@@ -13,8 +13,8 @@ if(BUILD_TENSORFLOW_LITE)
    endmacro(return_unless)
  
    # Required packages
-  nnas_find_package(AbseilSource QUIET)
-  return_unless(AbseilSource_FOUND)
+  nnas_find_package(Abseil QUIET)
+  return_unless(Abseil_FOUND)
    nnfw_find_package(TensorFlowEigen EXACT 1.13.1 QUIET)
    return_unless(TensorFlowEigen_1_13_1_FOUND)
    nnas_find_package(FarmhashSource QUIET)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt

deleted file mode 100644 (file)

index 616f8ff..0000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt
+++ /dev/null
@@ -1,124 +0,0 @@
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
-#
-# Tensorflow Lite library 2.3.0
-#
-set(TENSORFLOW_LITE_BASE ${TFLiteVanillaTensorFlowSource_DIR}/tensorflow/lite)
-
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
-     "${TENSORFLOW_LITE_BASE}/*.cc"
-     "${TENSORFLOW_LITE_BASE}/core/*.cc")
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
-     "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
-
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
-
-file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
-
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
-
-# externals
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFarmhashSource_DIR}/src/farmhash.cc")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg2d.c")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFlatBuffersSource_DIR}/src/util.cpp")
-
-# externals - absl
-file(GLOB_RECURSE ABSL_SRCS "${TFLiteVanillaAbslSource_DIR}/absl/*.cc")
-file(GLOB_RECURSE ABSL_EXCLS "${TFLiteVanillaAbslSource_DIR}/absl/*test*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/*benchmark*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/synchronization/*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/debugging/*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/hash/*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/flags/*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/random/*.cc")
-list(REMOVE_ITEM ABSL_SRCS ${ABSL_EXCLS})
-list(APPEND TFLITE_SRCS ${ABSL_SRCS})
-
-# externals - ruy
-file(GLOB RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/*.cc")
-file(GLOB_RECURSE RUY_EXCLS "${TFLiteVanillaRuySource_DIR}/ruy/*test*.cc"
-      "${TFLiteVanillaRuySource_DIR}/ruy/*benchmark*.cc"
-      "${TFLiteVanillaRuySource_DIR}/ruy/*example*.cc")
-list(REMOVE_ITEM RUY_SRCS ${RUY_EXCLS})
-# Temporary fix for ruy compilation error.
-# TODO(b/158800055): Remove this hack once the ruy version is correctly bumped.
-list(REMOVE_ITEM RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/prepare_packed_matrices.cc")
-list(APPEND TFLITE_SRCS ${RUY_SRCS})
-
-
-# Build with mmap? true
-# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
-set(BUILD_WITH_MMAP TRUE)
-if(${BUILD_WITH_MMAP})
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
-else()
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
-endif()
-
-# Build with nnapi? true
-# caution: this nnapi delegate comes from tflite, not ours.
-set(BUILD_WITH_NNAPI TRUE)
-if(${BUILD_WITH_NNAPI})
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
-else()
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
-endif()
-
-# ios: we don't support ios
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
-
-# android
-if(NOT ANDROID)
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
-endif()
-
-# exclude some source files
-file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
-     "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
-     "${TENSORFLOW_LITE_BASE}/*example*.cc"
-     "${TENSORFLOW_LITE_BASE}/*tool*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
-
-# include headers
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaTensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaEigenSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaAbslSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaGEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaNEON2SSESource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFarmhashSource_DIR}/src")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFlatBuffersSource_DIR}/include")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFP16Source_DIR}/include")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaRuySource_DIR}")
-
-add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_include_directories(tensorflow-lite-2.3.0 PRIVATE ${CpuInfoSource_DIR})
-target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
-set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite-2.3.0 eigen ${LIB_PTHREAD} dl cpuinfo)
-if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
-  target_link_libraries(tensorflow-lite-2.3.0 rt)
-endif()
-
-if(ANDROID)
-  target_link_libraries(tensorflow-lite-2.3.0 log)
-  target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt

new file mode 100644 (file)

index 0000000..afee6e1
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt
@@ -0,0 +1,96 @@
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
+#
+# Tensorflow Lite library 2.3.0
+#
+set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
+
+file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
+     "${TENSORFLOW_LITE_BASE}/*.cc"
+     "${TENSORFLOW_LITE_BASE}/core/*.cc")
+
+file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
+
+file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
+
+file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
+     "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
+
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
+
+file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
+
+file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
+
+list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
+
+# externals
+list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
+list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
+
+# Build with mmap? true
+# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
+set(BUILD_WITH_MMAP TRUE)
+if(${BUILD_WITH_MMAP})
+  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
+else()
+  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
+endif()
+
+# Build with nnapi? true
+# caution: this nnapi delegate comes from tflite, not ours.
+set(BUILD_WITH_NNAPI TRUE)
+if(${BUILD_WITH_NNAPI})
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
+else()
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
+endif()
+
+# ios: we don't support ios
+list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
+
+# android
+if(NOT ANDROID)
+  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
+endif()
+
+# exclude some source files
+file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
+     "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
+     "${TENSORFLOW_LITE_BASE}/*example*.cc"
+     "${TENSORFLOW_LITE_BASE}/*tool*.cc")
+list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
+
+# include headers
+list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include")
+
+if(NEON2SSESource_FOUND)
+  list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
+endif(NEON2SSESource_FOUND)
+
+add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
+target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
+target_include_directories(tensorflow-lite-2.3.0 PRIVATE ${CpuInfoSource_DIR})
+target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
+set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(tensorflow-lite-2.3.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
+if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
+  target_link_libraries(tensorflow-lite-2.3.0 rt)
+endif()
+
+if(ANDROID)
+  target_link_libraries(tensorflow-lite-2.3.0 log)
+  target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
+endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake

new file mode 100644 (file)

index 0000000..c81958c
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake
@@ -0,0 +1,44 @@
+if(BUILD_TENSORFLOW_LITE_2_3_0)
+  macro(return_unless VAR)
+  if(NOT ${VAR})
+    message("TFLiteVanillaRun: ${VAR} NOT TRUE")
+    set(TensorFlowLite_2_3_0_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${VAR})
+  endmacro(return_unless)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
+  return_unless(TensorFlowSource_FOUND)
+
+  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl
+  nnas_find_package(AbseilSource QUIET)
+  return_unless(AbseilSource_FOUND)
+  nnfw_find_package(Eigen QUIET)
+  return_unless(Eigen_FOUND)
+  nnas_find_package(Farmhash QUIET)
+  return_unless(Farmhash_FOUND)
+  nnfw_find_package(FlatBuffers QUIET)
+  return_unless(FlatBuffers_FOUND)
+  nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+  return_unless(TensorFlowGEMMLowpSource_FOUND)
+  nnas_find_package(OouraFFTSource QUIET)
+  return_unless(OouraFFTSource_FOUND)
+  nnfw_find_package(Ruy QUIET)
+  return_unless(Ruy_FOUND)
+
+  # TensorFlow Lite requires FP16 library's header only
+  nnas_find_package(Fp16Source QUIET)
+  return_unless(Fp16Source_FOUND)
+
+  # Optional packages
+  nnas_find_package(NEON2SSESource QUIET)
+
+  nnas_include(ExternalProjectTools)
+  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.3.0)
+
+  set(TensorFlowLite_2_3_0_FOUND TRUE)
+  return()
+endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake

new file mode 100644 (file)

index 0000000..08e6374
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake
@@ -0,0 +1,9 @@
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake

deleted file mode 100644 (file)

index 9671dc4..0000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake
+++ /dev/null
@@ -1,107 +0,0 @@
-if(BUILD_TENSORFLOW_LITE_2_3_0)
-  macro(return_unless VAR)
-  if(NOT ${VAR})
-    message("${VAR} NOT TRUE")
-    set(TensorFlowLite_2_3_0_FOUND PARENT_SCOPE)
-    return()
-  endif(NOT ${VAR})
-  endmacro(return_unless)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
-
-  set(absl_url "https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_Absl" ${absl_url})
-  set(TFLiteVanillaAbslSource_DIR "${TFLiteVanilla_Absl_SOURCE_DIR}")
-  if (NOT TFLiteVanillaAbslSource_DIR STREQUAL "")
-    set(TFLiteVanillaAbslSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaAbslSource_FOUND)
-
-  set(eigen_url "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_Eigen" ${eigen_url})
-  set(TFLiteVanillaEigenSource_DIR "${TFLiteVanilla_Eigen_SOURCE_DIR}")
-  if (NOT TFLiteVanillaEigenSource_DIR STREQUAL "")
-    set(TFLiteVanillaEigenSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaEigenSource_FOUND)
-
-  set(farmhash_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_Farmhash" ${farmhash_url})
-  set(TFLiteVanillaFarmhashSource_DIR "${TFLiteVanilla_Farmhash_SOURCE_DIR}")
-  if (NOT TFLiteVanillaFarmhashSource_DIR STREQUAL "")
-    set(TFLiteVanillaFarmhashSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaFarmhashSource_FOUND)
-
-  set(fft2d_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/petewarden/OouraFFT/archive/v1.0.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_FFT2D" ${fft2d_url})
-  set(TFLiteVanillaFFT2DSource_DIR "${TFLiteVanilla_FFT2D_SOURCE_DIR}")
-  if (NOT TFLiteVanillaFFT2DSource_DIR STREQUAL "")
-    set(TFLiteVanillaFFT2DSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaFFT2DSource_FOUND)
-
-  set(flatbuffers_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_FlatBuffers" ${flatbuffers_url})
-  set(TFLiteVanillaFlatBuffersSource_DIR "${TFLiteVanilla_FlatBuffers_SOURCE_DIR}")
-  if (NOT TFLiteVanillaFlatBuffersSource_DIR STREQUAL "")
-    set(TFLiteVanillaFlatBuffersSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaFlatBuffersSource_FOUND)
-
-  set(fp16_url "https://github.com/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.zip")
-  ExternalSource_Download("TFLiteVanilla_FP16" ${fp16_url})
-  set(TFLiteVanillaFP16Source_DIR "${TFLiteVanilla_FP16_SOURCE_DIR}")
-  if (NOT TFLiteVanillaFP16Source_DIR STREQUAL "")
-    set(TFLiteVanillaFP16Source_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaFP16Source_FOUND)
-
-  set(gemmlowp_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip")
-  ExternalSource_Download("TFLiteVanilla_GEMMLowp" ${gemmlowp_url})
-  set(TFLiteVanillaGEMMLowpSource_DIR "${TFLiteVanilla_GEMMLowp_SOURCE_DIR}")
-  if (NOT TFLiteVanillaGEMMLowpSource_DIR STREQUAL "")
-    set(TFLiteVanillaGEMMLowpSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaGEMMLowpSource_FOUND)
-
-  set(neon2sse_url "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_NEON2SSE" ${neon2sse_url})
-  set(TFLiteVanillaNEON2SSESource_DIR "${TFLiteVanilla_NEON2SSE_SOURCE_DIR}")
-  if (NOT TFLiteVanillaNEON2SSESource_DIR STREQUAL "")
-    set(TFLiteVanillaNEON2SSESource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaNEON2SSESource_FOUND)
-
-  set(tensorflow_url "https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_TensorFlow" ${tensorflow_url})
-  set(TFLiteVanillaTensorFlowSource_DIR "${TFLiteVanilla_TensorFlow_SOURCE_DIR}")
-  if (NOT TFLiteVanillaTensorFlowSource_DIR STREQUAL "")
-    set(TFLiteVanillaTensorFlowSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaTensorFlowSource_FOUND)
-
-  set(ruy_url "https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip")
-  ExternalSource_Download("TFLiteVanilla_Ruy" ${ruy_url})
-  set(TFLiteVanillaRuySource_DIR "${TFLiteVanilla_Ruy_SOURCE_DIR}")
-  if (NOT TFLiteVanillaRuySource_DIR STREQUAL "")
-    set(TFLiteVanillaRuySource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaRuySource_FOUND)
-
-  nnfw_find_package(CpuInfo QUIET)
-  if (NOT CpuInfo_FOUND)
-    message(STATUS "TFLiteVanillaRun: CPUINFO not found")
-    set(TensorFlowLite_2_3_0_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT CpuInfo_FOUND)
-
-  nnas_include(ExternalProjectTools)
-  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite-2.3.0" tflite-2.3.0)
-
-  set(TensorFlowLite_2_3_0_FOUND TRUE)
-  return()
-endif()
diff --git a/infra/nnfw/cmake/packages/XnnpackConfig.cmake b/infra/nnfw/cmake/packages/XnnpackConfig.cmake

index 191a28f0e7ebc707ad317e5922760d944eaf58a6..101d757ec9bf479bc1411ed499f8ed35d64056f2 100644 (file)
--- a/infra/nnfw/cmake/packages/XnnpackConfig.cmake
+++ b/infra/nnfw/cmake/packages/XnnpackConfig.cmake
@@ -31,6 +31,9 @@ function(_Xnnpack_Build)
    set(Xnnpack_FOUND TRUE PARENT_SCOPE)
  endfunction(_Xnnpack_Build)
  
+string(REGEX REPLACE "-flto" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+string(REGEX REPLACE "-flto" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
  if(BUILD_XNNPACK)
    _Xnnpack_Build()
  else(BUILD_XNNPACK)
diff --git a/infra/packaging/build b/infra/packaging/build

index e941a724b92b549909d1c9f425b45347d4773448..667857f537cbeb0c89feac4bec667cc83db056d9 100644 (file)
--- a/infra/packaging/build
+++ b/infra/packaging/build
@@ -8,7 +8,7 @@ if [[ -z "${NNAS_PROJECT_PATH}" ]]; then
  fi
  
  # The default preset
-PRESET="20200630"
+PRESET="20210406"
  
  EXTRA_OPTIONS=()
  while [ "$#" -ne 0 ]; do
diff --git a/infra/packaging/preset/20200630 b/infra/packaging/preset/20200630

index 506b9f8db891354e2b91308347cb3a71948cf83d..a1721d94119f43acf93e99e4118dcb24f545dc2c 100644 (file)
--- a/infra/packaging/preset/20200630
+++ b/infra/packaging/preset/20200630
@@ -9,7 +9,7 @@ function preset_configure()
  {
    REQUIRED_UNITS=()
    # Common Libraries
-  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
    REQUIRED_UNITS+=("oops" "pepper-assert" "foder")
    REQUIRED_UNITS+=("souschef")
    REQUIRED_UNITS+=("safemain")
diff --git a/infra/packaging/preset/20200731_windows b/infra/packaging/preset/20200731_windows

index 763487a472d1b0d9281e1854c92cc075c88d3024..078c7db479e13edc9f9a509123c05c0d62f25f4f 100644 (file)
--- a/infra/packaging/preset/20200731_windows
+++ b/infra/packaging/preset/20200731_windows
@@ -4,7 +4,7 @@ function preset_configure()
  {
    REQUIRED_UNITS=()
    # Common Libraries
-  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
    REQUIRED_UNITS+=("oops" "pepper-assert" "foder")
    REQUIRED_UNITS+=("souschef")
    REQUIRED_UNITS+=("safemain")
diff --git a/infra/packaging/preset/20210406 b/infra/packaging/preset/20210406

new file mode 100644 (file)

index 0000000..3da0970
--- /dev/null
+++ b/infra/packaging/preset/20210406
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20210406"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20210406_windows b/infra/packaging/preset/20210406_windows

new file mode 100644 (file)

index 0000000..5a250ca
--- /dev/null
+++ b/infra/packaging/preset/20210406_windows
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20210406" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20200630 b/infra/packaging/res/tf2nnpkg.20200630

index db7053a7bd87f6b0fff886ec6c261ffab3048fd2..b7091541a84f4c231dd0465687dc0c81b1336814 100644 (file)
--- a/infra/packaging/res/tf2nnpkg.20200630
+++ b/infra/packaging/res/tf2nnpkg.20200630
@@ -125,6 +125,6 @@ ${TF2TFLITE_CONVERT_SCRIPT}
  "${ROOT}/bin/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" "${TMPDIR}/${MODEL_NAME}.tmp.circle"
  
  # optimize
-"${ROOT}/bin/circle2circle" --all "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
  
  "${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/packaging/res/tf2nnpkg.20210406 b/infra/packaging/res/tf2nnpkg.20210406

new file mode 100644 (file)

index 0000000..b709154
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20210406
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+# Generate BCQ information metadata
+# If model has no BCQ information or invalid information, pb file is not changed.
+"${ROOT}/bin/generate_bcq_metadata" \
+--input_path "${GRAPHDEF_FILE}" \
+--output_path "${TMPDIR}/${MODEL_NAME}_withmeta.pb" \
+--output_arrays "${OUTPUT}"
+
+# Generate BCQ information nodes as output_arrays
+# If model has no BCQ information, output_arrays would be empty.
+"${ROOT}/bin/generate_bcq_output_arrays" \
+--input_path "${TMPDIR}/${MODEL_NAME}_withmeta.pb" \
+--metadata_path "${TMPDIR}/${MODEL_NAME}_metadata_arrays.txt" \
+--output_arrays_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt"
+
+# generate tflite file
+TF2TFLITE_CONVERT_SCRIPT="python ${ROOT}/bin/tf2tfliteV2.py ${TF_INTERFACE} "
+TF2TFLITE_CONVERT_SCRIPT+="--input_path ${TMPDIR}/${MODEL_NAME}_withmeta.pb "
+TF2TFLITE_CONVERT_SCRIPT+="--input_arrays ${INPUT} "
+TF2TFLITE_CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+TF2TFLITE_CONVERT_SCRIPT+="--output_arrays "
+TF2TFLITE_CONVERT_SCRIPT+="$(cat ${TMPDIR}/${MODEL_NAME}_metadata_arrays.txt)"
+TF2TFLITE_CONVERT_SCRIPT+="${OUTPUT}"
+TF2TFLITE_CONVERT_SCRIPT+="$(cat ${TMPDIR}/${MODEL_NAME}_output_arrays.txt) "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  TF2TFLITE_CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
+
+${TF2TFLITE_CONVERT_SCRIPT}
+
+# convert .tflite to .circle
+"${ROOT}/bin/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" "${TMPDIR}/${MODEL_NAME}.tmp.circle"
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/build_android_runtime_release.sh b/infra/scripts/build_android_runtime_release.sh

index c9a3b1ba296e826516b6e26bba9659bde1ce8d66..a131fbe40537dd118607f77e45e2cec4cd44a0c0 100755 (executable)
--- a/infra/scripts/build_android_runtime_release.sh
+++ b/infra/scripts/build_android_runtime_release.sh
@@ -5,10 +5,12 @@ ROOT_PATH="$CURRENT_PATH/../../"
  
  # prepare pre-built armcompute library
  # android build requires pre-built armcompute library
-if [ ! -n "$EXT_ACL_FOLDER" ]; then
-  echo "Please set EXT_ACL_FOLDER to use pre-built armcompute library"
-  exit 1
-fi
+# if [ ! -n "$EXT_ACL_FOLDER" ]; then
+#   echo "Please set EXT_ACL_FOLDER to use pre-built armcompute library"
+#   exit 1
+# fi
+
+unset EXT_ACL_FOLDER
  
  # prepare ndk
  if [ ! -n "$NDK_DIR" ]; then
diff --git a/infra/scripts/common.sh b/infra/scripts/common.sh

index 818957a21ccb56d55fa68f35148574fa817b2465..4a1385d0393596f198053ad5027d9053615f8b14 100755 (executable)
--- a/infra/scripts/common.sh
+++ b/infra/scripts/common.sh
@@ -50,10 +50,10 @@ function TFLiteModelVerification()
  
    export BACKENDS=$1
    if [[ "$2" == "" ]]; then
-    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
        --reportdir=$ROOT_PATH/$3
    else
-    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
        --list=$2 \
        --reportdir=$ROOT_PATH/$3
    fi
@@ -134,18 +134,18 @@ function NNPackageTest()
  # $2: (required) test list file relative path from nnfw root directory
  #                pass empty string if there is no skiplist
  # $3: (required) relative path to report from nnfw root directory
-function TFLiteLoaderTest()
+function NNAPIFrontendTest()
  {
-  [[ $# -ne 3 ]] && echo "TFLiteLoaderTest: Invalid function argument setting" && exit 1
+  [[ $# -ne 3 ]] && echo "NNAPIFrontendTest: Invalid function argument setting" && exit 1
  
    pushd ${ROOT_PATH} > /dev/null
  
    export BACKENDS=$1
    if [[ "$2" == "" ]]; then
-    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
        --reportdir=$ROOT_PATH/$3
    else
-    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
        --list=$2 \
        --reportdir=$ROOT_PATH/$3
    fi
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh

index a0323e0a08ff3f82382b3a181a741503a7eda4c7..133af3f699869b7aa77f378b93357c35a53a5490 100644 (file)
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -3,21 +3,25 @@
  # Don't run this script
  [[ "${BASH_SOURCE[0]}" == "${0}" ]] && echo "Please don't execute ${BASH_SOURCE[0]}, source it" && return
  
-DEBUG_BUILD_ITEMS="angkor;cwrap;pepper-str;pepper-strcast;pp;stdex"
+DEBUG_BUILD_ITEMS="angkor;cwrap;pepper-str;pepper-strcast;pp"
  DEBUG_BUILD_ITEMS+=";oops;pepper-assert"
  DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
  DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
-DEBUG_BUILD_ITEMS+=";foder;souschef;arser;vconone"
+DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
  DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite"
  DEBUG_BUILD_ITEMS+=";tflite2circle"
  DEBUG_BUILD_ITEMS+=";luci"
  DEBUG_BUILD_ITEMS+=";luci-interpreter"
-DEBUG_BUILD_ITEMS+=";luci-value-test"
+DEBUG_BUILD_ITEMS+=";luci-eval-driver;luci-pass-value-test;luci-value-test"
  DEBUG_BUILD_ITEMS+=";circle2circle;record-minmax;circle-quantizer"
+DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver"
  DEBUG_BUILD_ITEMS+=";circle-verify"
+DEBUG_BUILD_ITEMS+=";circle-tensordump"
  DEBUG_BUILD_ITEMS+=";tflchef;circlechef"
  DEBUG_BUILD_ITEMS+=";common-artifacts"
  DEBUG_BUILD_ITEMS+=";circle2circle-dredd-recipe-test"
  DEBUG_BUILD_ITEMS+=";record-minmax-conversion-test"
  DEBUG_BUILD_ITEMS+=";tf2tfliteV2;tf2tfliteV2-conversion-test"
  DEBUG_BUILD_ITEMS+=";tflite2circle-conversion-test"
+DEBUG_BUILD_ITEMS+=";pota-quantization-value-test"
+DEBUG_BUILD_ITEMS+=";circle-part-value-test"
diff --git a/infra/scripts/docker_build_cross_aarch64_runtime.sh b/infra/scripts/docker_build_cross_aarch64_runtime.sh

index 607526bc836dfcd069a353a7ccaf69d4de2bcee1..f73894fdffd83c5f4e5ee45c21b536d6d3c4f66c 100755 (executable)
--- a/infra/scripts/docker_build_cross_aarch64_runtime.sh
+++ b/infra/scripts/docker_build_cross_aarch64_runtime.sh
@@ -22,8 +22,8 @@ else
  fi
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
diff --git a/infra/scripts/docker_build_cross_arm_runtime.sh b/infra/scripts/docker_build_cross_arm_runtime.sh

index 07b5ca4b5cd0abd0077607fd3f95ad3c0ba92c33..17d75ded36056acf402986bbf22032377791d7ca 100755 (executable)
--- a/infra/scripts/docker_build_cross_arm_runtime.sh
+++ b/infra/scripts/docker_build_cross_arm_runtime.sh
@@ -22,8 +22,8 @@ else
  fi
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
diff --git a/infra/scripts/docker_build_cross_arm_runtime_release.sh b/infra/scripts/docker_build_cross_arm_runtime_release.sh

index 8d04438027ac7e3ef71e82e27cb3df13519202e1..377bc3e234e6fa420eee30ad56a2c810487f2301 100755 (executable)
--- a/infra/scripts/docker_build_cross_arm_runtime_release.sh
+++ b/infra/scripts/docker_build_cross_arm_runtime_release.sh
@@ -22,8 +22,8 @@ else
  fi
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
diff --git a/infra/scripts/docker_build_cross_coverage.sh b/infra/scripts/docker_build_cross_coverage.sh

index e03ea75715ba67189eab8f378c00e0f69cf68e65..454bf276df52cd47eecd3ddc81b9cbfd7898a5b6 100755 (executable)
--- a/infra/scripts/docker_build_cross_coverage.sh
+++ b/infra/scripts/docker_build_cross_coverage.sh
@@ -22,8 +22,8 @@ else
  fi
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh

index e65feb527cf7cfc1478fe570312ecff759765afe..96a00fa05df5b0e90ce93b7b42000301663c143f 100755 (executable)
--- a/infra/scripts/docker_build_nncc.sh
+++ b/infra/scripts/docker_build_nncc.sh
@@ -35,8 +35,8 @@ if [ -d $ONNXRUNTIME_PREFIX ]; then
  fi
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
@@ -57,7 +57,7 @@ mkdir -p ${NNCC_INSTALL_PREFIX}
  ./nncc docker-run ./nnas create-package --prefix "${PWD}/${NNCC_INSTALL_PREFIX}" -- "${CONFIG_OPTIONS}"
  
  mkdir -p ${ARCHIVE_PATH}
-tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} --exclude test ./
+tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} --exclude test --exclude tflchef* ./
  tar -zcf ${ARCHIVE_PATH}/nncc-test-package.tar.gz -C ${NNCC_INSTALL_PREFIX} ./test
  
  popd > /dev/null
diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh

index 0d2395bc00dfebe63ea258e2ae8ddf6ced22d837..9f3966af798e7870b532a62ee430ad41efc7e441 100755 (executable)
--- a/infra/scripts/docker_build_test_x64.sh
+++ b/infra/scripts/docker_build_test_x64.sh
@@ -14,8 +14,8 @@ else
  fi
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
diff --git a/infra/scripts/docker_build_tizen_cross.sh b/infra/scripts/docker_build_tizen_cross.sh

index 9a8378f05fd87a24b5327378a83b69606459fc65..42e79a7035c9cf137883deec801a6f120edfdc52 100755 (executable)
--- a/infra/scripts/docker_build_tizen_cross.sh
+++ b/infra/scripts/docker_build_tizen_cross.sh
@@ -22,8 +22,8 @@ else
  fi
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh

index ef6212a50383286ca319f8310f30f83c3c6c9ca3..5608c7800c00ae07d78460a727924f810ac7b815 100755 (executable)
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -40,8 +40,8 @@ if [ -d $ONNXRUNTIME_PREFIX ]; then
  fi
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
@@ -61,8 +61,8 @@ pushd $ROOT_PATH > /dev/null
  
  REQUIRED_UNITS=()
  # Common Libraries
-REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
-REQUIRED_UNITS+=("oops" "safemain" "foder" "arser" "vconone")
+REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+REQUIRED_UNITS+=("oops" "safemain" "foder" "crew" "arser" "vconone")
  # Hermes Logging Framework
  REQUIRED_UNITS+=("hermes" "hermes-std")
  # loco IR and related utilities
diff --git a/infra/scripts/docker_coverage_report.sh b/infra/scripts/docker_coverage_report.sh

index f0de1de5f7116dbbc2e8d636c2e442118e28e599..2c3ee303eaa31dbd6161d234353be14b54d8a657 100755 (executable)
--- a/infra/scripts/docker_coverage_report.sh
+++ b/infra/scripts/docker_coverage_report.sh
@@ -8,8 +8,8 @@ CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  ROOT_PATH="$CURRENT_PATH/../../"
  
  # docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
  # - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
  if [[ -z $DOCKER_IMAGE_NAME ]]; then
    echo "It will use default docker image name"
  fi
diff --git a/infra/scripts/test_coverage.sh b/infra/scripts/test_coverage.sh

index 369e532391150bdf11c0bfeae2da8be270a81e69..6cb4bb7dd4ca80ab250847e1e109e32a421f25d4 100755 (executable)
--- a/infra/scripts/test_coverage.sh
+++ b/infra/scripts/test_coverage.sh
@@ -24,15 +24,15 @@ if [[ ! -e $ROOT_PATH/tests/scripts/build_path_depth.txt ]]; then
  fi
  export GCOV_PREFIX_STRIP=`cat $ROOT_PATH/tests/scripts/build_path_depth.txt`
  
-./infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader
+TENSOR_LOGGING=trace_log.txt ./infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --nnapi-frontend
  ./infra/scripts/test_ubuntu_runtime.sh --backend acl_neon
  ./infra/scripts/test_ubuntu_runtime.sh --backend cpu
  
  # Enable all logs (mixed backend)
-TENSOR_LOGGING=trace_log.txt ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
+ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
  # Enable trace event (acl_cl default backend)
  export TRACE_FILEPATH=trace.json
-TFLiteModelVerification "acl_cl" "Product/out/test/list/frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
+TFLiteModelVerification "acl_cl" "Product/out/test/list/tflite_comparator.armv7l.acl_cl.list" "report/acl_cl/trace"
  unset TRACE_FILEPATH
  
  # Interpreter
diff --git a/infra/scripts/test_ubuntu_runtime.sh b/infra/scripts/test_ubuntu_runtime.sh

index db70580f82b8455ecd2352ccb2b6db34afe1af27..17bdf6e990d22ad83b876ae10deba8cf0ac2412f 100755 (executable)
--- a/infra/scripts/test_ubuntu_runtime.sh
+++ b/infra/scripts/test_ubuntu_runtime.sh
@@ -7,9 +7,10 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
  BACKEND="cpu"
  TEST_OS="linux"
  TEST_PLATFORM="$TEST_ARCH-$TEST_OS"
-TFLITE_LOADER="0"
+TFLITE_LOADER="1"
  LINEAR_ONLY="0"
  RUN_INTERP="0"
+NNAPI_FRONTEND="0"
  
  function Usage()
  {
@@ -17,7 +18,7 @@ function Usage()
    echo ""
    echo "Options:"
    echo "      --backend <BACKEND>     Runtime backend to test (default: ${BACKEND})"
-  echo "      --tflite-loader         Enable TFLite Loader test"
+  echo "      --nnapi-frontend        NNAPI Frontend test"
    echo "      --linear-only           Use Linear executor only"
  }
  
@@ -39,6 +40,12 @@ do
        ;;
      --tflite-loader)
        TFLITE_LOADER="1"
+      NNAPI_FRONTEND="1" # For CI test
+      echo "[INFO] \"--tflite-loader\" argument is deprecated"
+      shift
+      ;;
+    --nnapi-frontend)
+      NNAPI_FRONTEND="1"
        shift
        ;;
      --linear-only)
@@ -68,7 +75,7 @@ else
  fi
  
  UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
-FRAMEWORK_TESTLIST="Product/out/test/list/frameworktest_list.${TEST_ARCH}.${BACKEND}.txt"
+TFLITE_TESTLIST="Product/out/test/list/tflite_comparator.${TEST_ARCH}.${BACKEND}.list"
  REPORT_BASE="report/${BACKEND}"
  EXECUTORS=("Linear" "Dataflow" "Parallel")
  
@@ -92,7 +99,7 @@ do
    fi
  
    NNAPIGTest "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
-  TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_PATH}"
+  TFLiteModelVerification "${BACKEND}" "${TFLITE_TESTLIST}" "${REPORT_PATH}"
  
    if [ $EXECUTOR = "Interpreter" ]; then
      unset DISABLE_COMPILE
@@ -101,9 +108,8 @@ do
    fi
  done
  
-# Current support acl_cl backend testlist only
  # TODO Support more backends
-TFLITE_LOADER_TESTLIST="Product/out/test/list/tflite_loader_list.${TEST_ARCH}.txt"
-if [[ $TFLITE_LOADER = "1" ]]; then
-  TFLiteLoaderTest "${BACKEND}" "${TFLITE_LOADER_TESTLIST}" "${REPORT_BASE}/loader/${EXECUTOR}"
+NNAPI_FRONTEND_TESTLIST="Product/out/test/list/nnapi_test.${TEST_ARCH}.list"
+if [[ $NNAPI_FRONTEND = "1" ]]; then
+  NNAPIFrontendTest "${BACKEND}" "${NNAPI_FRONTEND_TESTLIST}" "${REPORT_BASE}/nnapi/${EXECUTOR}"
  fi
diff --git a/infra/scripts/test_ubuntu_runtime_mixed.sh b/infra/scripts/test_ubuntu_runtime_mixed.sh

index 6eab90cd3b4c335e3b7972d9d729e559cb603dd0..697fed8973216a2696164af9967938fdb4043547 100755 (executable)
--- a/infra/scripts/test_ubuntu_runtime_mixed.sh
+++ b/infra/scripts/test_ubuntu_runtime_mixed.sh
@@ -32,14 +32,14 @@ popd > /dev/null
  BACKENDS=(acl_cl acl_neon cpu)
  
  # Get the intersect of framework test list files
-TESTLIST_PREFIX="Product/out/test/list/frameworktest_list.${TEST_ARCH}"
+TESTLIST_PREFIX="Product/out/test/list/tflite_comparator.${TEST_ARCH}"
  SKIPLIST_PREFIX="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
-sort $TESTLIST_PREFIX.${BACKENDS[0]}.txt > $TESTLIST_PREFIX.intersect.txt
+sort $TESTLIST_PREFIX.${BACKENDS[0]}.list > $TESTLIST_PREFIX.intersect.list
  sort $SKIPLIST_PREFIX.${BACKENDS[0]} > $SKIPLIST_PREFIX.union
  for BACKEND in "${BACKENDS[@]:1}"; do
-    comm -12 <(sort $TESTLIST_PREFIX.intersect.txt) <(sort $TESTLIST_PREFIX.$BACKEND.txt) > $TESTLIST_PREFIX.intersect.next.txt
+    comm -12 <(sort $TESTLIST_PREFIX.intersect.list) <(sort $TESTLIST_PREFIX.$BACKEND.list) > $TESTLIST_PREFIX.intersect.next.list
      comm <(sort $SKIPLIST_PREFIX.union) <(sort $SKIPLIST_PREFIX.$BACKEND) | tr -d "[:blank:]" > $SKIPLIST_PREFIX.union.next
-    mv $TESTLIST_PREFIX.intersect.next.txt $TESTLIST_PREFIX.intersect.txt
+    mv $TESTLIST_PREFIX.intersect.next.list $TESTLIST_PREFIX.intersect.list
      mv $SKIPLIST_PREFIX.union.next $SKIPLIST_PREFIX.union
  done
  popd > /dev/null
@@ -60,4 +60,4 @@ export OP_BACKEND_AvgPool2D="acl_neon"
  export ACL_LAYOUT="NCHW"
  export RUY_THREADS=4
  NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
-TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed"
+TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.list" "report/mixed"
diff --git a/infra/scripts/tizen_xu4_test.sh b/infra/scripts/tizen_xu4_test.sh

index f412e7f7a6fb6f455513dbfd38e949c659e062d2..37576ac2edf2183a9ac282f57502c1313fcc9b87 100755 (executable)
--- a/infra/scripts/tizen_xu4_test.sh
+++ b/infra/scripts/tizen_xu4_test.sh
@@ -34,17 +34,6 @@ function install_model()
      $SDB_CMD push cache.tar.gz $TEST_ROOT/.
      rm -rf cache.tar.gz
      $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test/models
-
-    # download api test model file for nnfw_api_gtest
-    MODEL_CACHE_DIR=$(mktemp -d)
-    tests/scripts/models/run_test.sh --download=on --run=off \
-        --configdir=tests/scripts/models/nnfw_api_gtest \
-        --cachedir=$MODEL_CACHE_DIR
-    tar -zcf $MODEL_CACHE_DIR/api_model_test.tar.gz -C $MODEL_CACHE_DIR .
-    $SDB_CMD push $MODEL_CACHE_DIR/api_model_test.tar.gz $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
-    $SDB_CMD shell tar -zxf $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/api_model_test.tar.gz \
-    -C $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
-    rm -rf $MODEL_CACHE_DIR
      popd
  }
  
diff --git a/nnpackage/examples/README.md b/nnpackage/examples/README.md

new file mode 100644 (file)

index 0000000..fb0bae3
--- /dev/null
+++ b/nnpackage/examples/README.md
@@ -0,0 +1,32 @@
+# NNPackage example
+
+## Package version 1.1.0
+
+### one_op_in_tflite
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+
+## Package version 1.0.0
+
+### add
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+
+### add_invalid_manifest
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+- Invalid menifest: invalid json format
+
+### if_dynamic
+
+- Model file: TensorFlow Lite model
+- `IF` operation example with input and output example
+
+### while_dynamic
+
+- Model file: TensorFlow Lite model
+- `WHILE` operation example with input and output example
+
diff --git a/nnpackage/examples/one_op_in_tflite/add.tflite b/nnpackage/examples/one_op_in_tflite/add.tflite

deleted file mode 100644 (file)

index e748b68..0000000

Binary files a/nnpackage/examples/one_op_in_tflite/add.tflite and /dev/null differ
diff --git a/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST b/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST

deleted file mode 100644 (file)

index 3ed12f9..0000000
--- a/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "major-version" : "1",
-  "minor-version" : "1",
-  "patch-version" : "0",
-  "configs"     : [ "config.cfg" ],
-  "models"      : [ "add.tflite" ],
-  "model-types" : [ "tflite" ]
-}
diff --git a/nnpackage/examples/one_op_in_tflite/metadata/config.cfg b/nnpackage/examples/one_op_in_tflite/metadata/config.cfg

deleted file mode 100644 (file)

index 776fa70..0000000
--- a/nnpackage/examples/one_op_in_tflite/metadata/config.cfg
+++ /dev/null
@@ -1 +0,0 @@
-BACKENDS="cpu"
diff --git a/nnpackage/examples/v1.0.0/add/add.tflite b/nnpackage/examples/v1.0.0/add/add.tflite

new file mode 100644 (file)

index 0000000..e748b68

Binary files /dev/null and b/nnpackage/examples/v1.0.0/add/add.tflite differ
diff --git a/nnpackage/examples/v1.0.0/add/metadata/MANIFEST b/nnpackage/examples/v1.0.0/add/metadata/MANIFEST

new file mode 100644 (file)

index 0000000..1d96cce
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/add/metadata/MANIFEST
@@ -0,0 +1,7 @@
+{
+  "major-version" : "1",
+  "minor-version" : "0",
+  "patch-version" : "0",
+  "models"      : [ "add.tflite" ],
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.0.0/add_invalid_manifest/add.tflite b/nnpackage/examples/v1.0.0/add_invalid_manifest/add.tflite

new file mode 100644 (file)

index 0000000..e748b68

Binary files /dev/null and b/nnpackage/examples/v1.0.0/add_invalid_manifest/add.tflite differ
diff --git a/nnpackage/examples/v1.0.0/add_invalid_manifest/metadata/MANIFEST b/nnpackage/examples/v1.0.0/add_invalid_manifest/metadata/MANIFEST

new file mode 100644 (file)

index 0000000..8b18e4e
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/add_invalid_manifest/metadata/MANIFEST
@@ -0,0 +1,7 @@
+{
+  "major-version" : "1"
+  "minor-version" : "0"
+  "patch-version" : "0"
+  "models"      : [ "add.tflite" ]
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.0.0/if_dynamic/if_dynamic.tflite b/nnpackage/examples/v1.0.0/if_dynamic/if_dynamic.tflite

new file mode 100644 (file)

index 0000000..680a8b1

Binary files /dev/null and b/nnpackage/examples/v1.0.0/if_dynamic/if_dynamic.tflite differ
diff --git a/nnpackage/examples/v1.0.0/if_dynamic/metadata/MANIFEST b/nnpackage/examples/v1.0.0/if_dynamic/metadata/MANIFEST

new file mode 100644 (file)

index 0000000..0fea980
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/if_dynamic/metadata/MANIFEST
@@ -0,0 +1,7 @@
+{
+  "major-version" : "1",
+  "minor-version" : "0",
+  "patch-version" : "0",
+  "models"      : [ "if_dynamic.tflite" ],
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/expected.h5 b/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/expected.h5

new file mode 100644 (file)

index 0000000..d1a47b9

Binary files /dev/null and b/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/expected.h5 differ
diff --git a/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/input.h5 b/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/input.h5

new file mode 100644 (file)

index 0000000..1309ed5

Binary files /dev/null and b/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/input.h5 differ
diff --git a/nnpackage/examples/v1.0.0/while_dynamic/metadata/MANIFEST b/nnpackage/examples/v1.0.0/while_dynamic/metadata/MANIFEST

new file mode 100644 (file)

index 0000000..cfe19ad
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/while_dynamic/metadata/MANIFEST
@@ -0,0 +1,7 @@
+{
+  "major-version" : "1",
+  "minor-version" : "0",
+  "patch-version" : "0",
+  "models"      : [ "while_dynamic.tflite" ],
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/expected.h5 b/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/expected.h5

new file mode 100644 (file)

index 0000000..5d5eec6

Binary files /dev/null and b/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/expected.h5 differ
diff --git a/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/input.h5 b/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/input.h5

new file mode 100644 (file)

index 0000000..75f0909

Binary files /dev/null and b/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/input.h5 differ
diff --git a/nnpackage/examples/v1.0.0/while_dynamic/while_dynamic.tflite b/nnpackage/examples/v1.0.0/while_dynamic/while_dynamic.tflite

new file mode 100644 (file)

index 0000000..6f201d5

Binary files /dev/null and b/nnpackage/examples/v1.0.0/while_dynamic/while_dynamic.tflite differ
diff --git a/nnpackage/examples/v1.1.0/one_op_in_tflite/add.tflite b/nnpackage/examples/v1.1.0/one_op_in_tflite/add.tflite

new file mode 100644 (file)

index 0000000..e748b68

Binary files /dev/null and b/nnpackage/examples/v1.1.0/one_op_in_tflite/add.tflite differ
diff --git a/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/MANIFEST b/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/MANIFEST

new file mode 100644 (file)

index 0000000..3ed12f9
--- /dev/null
+++ b/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/MANIFEST
@@ -0,0 +1,8 @@
+{
+  "major-version" : "1",
+  "minor-version" : "1",
+  "patch-version" : "0",
+  "configs"     : [ "config.cfg" ],
+  "models"      : [ "add.tflite" ],
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/config.cfg b/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/config.cfg

new file mode 100644 (file)

index 0000000..776fa70
--- /dev/null
+++ b/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/config.cfg
@@ -0,0 +1 @@
+BACKENDS="cpu"
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec

index 028d88b6162037572df4b26337c1963a0177b1ee..2eba073fdccd739fd4defe64ca27a5c409207a84 100644 (file)
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
  Name:    nnfw
  Summary: nnfw
-Version: 1.12.0
+Version: 1.15.0
  Release: 1
  Group:   Development
  License: Apache-2.0 and MIT and BSD-2-Clause
@@ -91,6 +91,9 @@ NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
  %ifarch aarch64
  %define target_arch aarch64
  %endif
+%ifarch %ix86
+%define target_arch i686
+%endif
  
  %define install_dir %{_prefix}
  %define install_path %{buildroot}%{install_dir}
@@ -128,7 +131,7 @@ tar -xf %{SOURCE1010} -C ./externals
  tar -xf %{SOURCE1011} -C ./externals
  
  %build
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
  # runtime build
  %{build_env} ./nnfw configure %{build_options} %{extra_option}
  %{build_env} ./nnfw build -j4
@@ -149,7 +152,7 @@ tar -zcf test-suite.tar.gz infra/scripts
  %endif # arm armv7l aarch64
  
  %install
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
  
  mkdir -p %{buildroot}%{_libdir}
  mkdir -p %{buildroot}%{_bindir}
@@ -193,14 +196,14 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
  %files
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
  %{_libdir}/*.so
  %endif
  
  %files devel
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
  %dir %{_includedir}/nnfw
  %{_includedir}/nnfw/*
  %{_libdir}/pkgconfig/nnfw.pc
@@ -209,13 +212,13 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
  %files plugin-devel
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
  %dir %{_includedir}/onert
  %{_includedir}/onert/*
  %{_libdir}/pkgconfig/nnfw-plugin.pc
  %endif
  
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
  %files minimal-app
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
diff --git a/res/TensorFlowLiteRecipes/BroadcastTo_000/test.recipe b/res/TensorFlowLiteRecipes/BroadcastTo_000/test.recipe

new file mode 100644 (file)

index 0000000..015e40b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/BroadcastTo_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "bc_input"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "bc_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "bc_ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "BroadcastTo"
+  input: "bc_input"
+  input: "bc_shape"
+  output: "bc_ofm"
+}
+input: "bc_input"
+output: "bc_ofm"
diff --git a/res/TensorFlowLiteRecipes/ExpandDims_004/test.recipe b/res/TensorFlowLiteRecipes/ExpandDims_004/test.recipe

new file mode 100644 (file)

index 0000000..20e6555
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ExpandDims_004/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 }
+}
+
+operand {
+  name: "ifm2"
+  type: INT32
+  shape { }
+  filler {
+    tag: "constant"
+    arg: "-1"
+  }
+}
+
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 1 }
+}
+
+operation {
+  type: "ExpandDims"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/FakeQuant_000/test.recipe b/res/TensorFlowLiteRecipes/FakeQuant_000/test.recipe

new file mode 100644 (file)

index 0000000..c96466f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FakeQuant_000/test.recipe
@@ -0,0 +1,25 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+
+operation {
+  type: "FakeQuant"
+  fakequant_options {
+    min: 0.0
+    max: 1.0
+    num_bits: 8
+    narrow_range: false
+  }
+  input: "ifm"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/FakeQuant_000/test.reverse b/res/TensorFlowLiteRecipes/FakeQuant_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.recipe b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.recipe

new file mode 100644 (file)

index 0000000..5069aac
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.recipe
@@ -0,0 +1,63 @@
+operand {
+  name: "bc_input"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "bc_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "bc_ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "BroadcastTo"
+  input: "bc_input"
+  input: "bc_shape"
+  output: "bc_ofm"
+}
+operand {
+  name: "reshape_data"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "reshape_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "reshape_ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "Reshape"
+  reshape_options {
+    new_shape: 1
+    new_shape: 2
+    new_shape: 3
+  }
+  input: "reshape_data"
+  input: "reshape_shape"
+  output: "reshape_ofm"
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "AddV2"
+  input: "bc_ofm"
+  input: "reshape_ofm"
+  output: "ofm"
+}
+input: "bc_input"
+input: "reshape_data"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.rule b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.rule

new file mode 100644 (file)

index 0000000..fdaa790
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.rule
@@ -0,0 +1,7 @@
+# To check if BroadcastTo and AddV2 are fused to Add op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ADD_EXIST"               $(op_count ADD) '=' 1
+RULE    "NO_BroadcastTo"          $(op_count 'CUSTOM(BroadcastTo)') '=' 0
+RULE    "NO_AddV2"                $(op_count 'CUSTOM(AddV2)') '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.recipe b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.recipe

new file mode 100644 (file)

index 0000000..ca0ad8e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.recipe
@@ -0,0 +1,63 @@
+operand {
+  name: "bc_input"
+  type: INT64
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "bc_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "bc_ofm"
+  type: INT64
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "BroadcastTo"
+  input: "bc_input"
+  input: "bc_shape"
+  output: "bc_ofm"
+}
+operand {
+  name: "reshape_data"
+  type: INT64
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "reshape_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "reshape_ofm"
+  type: INT64
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "Reshape"
+  reshape_options {
+    new_shape: 1
+    new_shape: 2
+    new_shape: 3
+  }
+  input: "reshape_data"
+  input: "reshape_shape"
+  output: "reshape_ofm"
+}
+operand {
+  name: "ofm"
+  type: INT64
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "AddV2"
+  input: "bc_ofm"
+  input: "reshape_ofm"
+  output: "ofm"
+}
+input: "bc_input"
+input: "reshape_data"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.rule b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.rule

new file mode 100644 (file)

index 0000000..d344589
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.rule
@@ -0,0 +1,7 @@
+# To check if BroadcastTo and AddV2 are not fused to Add op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "BroadcastTo_EXIST"       $(op_count 'CUSTOM(BroadcastTo)') '=' 1
+RULE    "AddV2_EXIST"             $(op_count 'CUSTOM(AddV2)') '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.recipe

new file mode 100644 (file)

index 0000000..5ee07b4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand { 
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.rule

new file mode 100644 (file)

index 0000000..00a25df
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.rule
@@ -0,0 +1,7 @@
+# To check if Add and Mul are fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.recipe

new file mode 100644 (file)

index 0000000..04bdd5a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand { 
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: RELU
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.rule

new file mode 100644 (file)

index 0000000..7f3511a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.rule
@@ -0,0 +1,7 @@
+# To check if Add(with RELU) and Mul are fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.recipe

new file mode 100644 (file)

index 0000000..e3fe1e3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand { 
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "mul_const"
+  input: "ofm_conv"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "add_const"
+  input: "ofm_mul"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.rule

new file mode 100644 (file)

index 0000000..329d175
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.rule
@@ -0,0 +1,7 @@
+# To check if Add and Mul with reverse input sequence are fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.recipe

new file mode 100644 (file)

index 0000000..d767316
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand { 
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: RELU
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.rule

new file mode 100644 (file)

index 0000000..9e158e3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.rule
@@ -0,0 +1,7 @@
+# To check if Add and Mul are not fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "MUL_EXIST"               $(op_count MUL) '=' 1
+RULE    "ADD_EXIST"               $(op_count ADD) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.recipe

new file mode 100644 (file)

index 0000000..6d166f0
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.recipe
@@ -0,0 +1,121 @@
+operand {
+  name: "Placeholder"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Const_4"
+  type: FLOAT32
+  shape { }
+  filler { tag: "explicit" arg: "6" }
+}
+operand {
+  name: "Const_5"
+  type: FLOAT32
+  shape { }
+  filler { tag: "explicit" arg: "0" }
+}
+operand {
+  name: "Conv2D_1"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_2"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_21"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_11"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Minimum"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Maximum"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Conv2D_22"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Minimum_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Maximum_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "Conv2D"
+  input: "Placeholder"
+  input: "Conv2D_1"
+  input: "Conv2D_2"
+  output: "Conv2D_11"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Minimum"
+  input: "Conv2D_11"
+  input: "Const_4"
+  output: "Minimum"
+}
+operation {
+  type: "Maximum"
+  input: "Minimum"
+  input: "Const_5"
+  output: "Maximum"
+}
+operation {
+  type: "Conv2D"
+  input: "Maximum"
+  input: "Conv2D_21"
+  input: "Conv2D_2"
+  output: "Conv2D_22"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Minimum"
+  input: "Conv2D_22"
+  input: "Const_4"
+  output: "Minimum_1"
+}
+operation {
+  type: "Maximum"
+  input: "Minimum_1"
+  input: "Const_5"
+  output: "Maximum_1"
+}
+input: "Placeholder"
+output: "Maximum_1"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.rule

new file mode 100644 (file)

index 0000000..a67530a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.rule
@@ -0,0 +1,8 @@
+# To check if Minumum and Maximum are converte to Relu6 op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 2
+RULE    "RELU6_EXIST"             $(op_count RELU6) '=' 2
+RULE    "MIN_NOT_EXIST"           $(op_count MINUMUM) '=' 0
+RULE    "MAX_NOT_EXIST"           $(op_count MAXIMUM) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.recipe

new file mode 100644 (file)

index 0000000..f6be63f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.recipe
@@ -0,0 +1,85 @@
+operand {
+  name: "Placeholder"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Conv2D_1"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_2"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_21"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_11"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "ReLU6"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Conv2D_22"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "ReLU6_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "Conv2D"
+  input: "Placeholder"
+  input: "Conv2D_1"
+  input: "Conv2D_2"
+  output: "Conv2D_11"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "ReLU6"
+  input: "Conv2D_11"
+  output: "ReLU6"
+}
+operation {
+  type: "Conv2D"
+  input: "ReLU6"
+  input: "Conv2D_21"
+  input: "Conv2D_2"
+  output: "Conv2D_22"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "ReLU6"
+  input: "Conv2D_22"
+  output: "ReLU6_1"
+}
+input: "Placeholder"
+output: "ReLU6_1"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.rule

new file mode 100644 (file)

index 0000000..34d5d66
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.rule
@@ -0,0 +1,6 @@
+# To check if ReLU6 is fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 2
+RULE    "RELU6_NOT_EXIST"         $(op_count RELU6) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.recipe

new file mode 100644 (file)

index 0000000..f976927
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.recipe
@@ -0,0 +1,91 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "scale"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "shift"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "dwout"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "mulout"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    depth_multiplier: 1
+    activation : NONE
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "dwout"
+}
+operation {
+  type: "Mul"
+  input: "dwout"
+  input: "scale"
+  output: "mulout"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "mulout"
+  input: "shift"
+  output: "ofm"
+  add_options {
+    activation: RELU6
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.rule b/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.rule

new file mode 100644 (file)

index 0000000..eb0cba8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Depthwise Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "DWCONV_EXIST"            $(op_count DEPTHWISE_CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.recipe b/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.recipe

new file mode 100644 (file)

index 0000000..4bbfd84
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.recipe
@@ -0,0 +1,91 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "scale"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "shift"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "dwout"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "mulout"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    depth_multiplier: 1
+    activation : NONE
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "dwout"
+}
+operation {
+  type: "Mul"
+  input: "dwout"
+  input: "scale"
+  output: "mulout"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "mulout"
+  input: "shift"
+  output: "ofm"
+  add_options {
+    activation: RELU6
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.rule b/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.rule

new file mode 100644 (file)

index 0000000..eb0cba8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Depthwise Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "DWCONV_EXIST"            $(op_count DEPTHWISE_CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.recipe b/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.recipe

index 92087829caa521a357839917f53116a484613e52..a795174842e995b4ae0b26e79c5df70524d23d77 100644 (file)
--- a/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.recipe
@@ -18,7 +18,7 @@ operand {
    name: "sequential/instance_normalization/stack"
    type: INT32
    shape {
-    dim: 5
+    dim: 4
    }
    filler {
      tag: "explicit"
@@ -26,7 +26,6 @@ operand {
      arg: "32"
      arg: "32"
      arg: "8"
-    arg: "1"
    }
  }
  operand {
@@ -51,7 +50,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
    filler {
      tag: "explicit"
@@ -73,7 +71,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
    filler {
      tag: "explicit"
@@ -101,13 +98,12 @@ operand {
    name: "sequential/instance_normalization/moments/variance/reduction_indices"
    type: INT32
    shape {
-    dim: 3
+    dim: 2
    }
    filler {
      tag: "explicit"
      arg: "1"
      arg: "2"
-    arg: "4"
    }
  }
  operand {
@@ -118,7 +114,6 @@ operand {
      dim: 32
      dim: 32
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -129,7 +124,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -140,7 +134,6 @@ operand {
      dim: 32
      dim: 32
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -151,7 +144,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -162,7 +154,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -173,7 +164,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -184,7 +174,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -195,7 +184,6 @@ operand {
      dim: 32
      dim: 32
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -206,7 +194,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -217,7 +204,6 @@ operand {
      dim: 1
      dim: 1
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -228,7 +214,6 @@ operand {
      dim: 32
      dim: 32
      dim: 8
-    dim: 1
    }
  }
  operand {
@@ -241,15 +226,9 @@ operand {
      dim: 8
    }
  }
-operation {
-  type: "Reshape"
-  input: "input_layer"
-  input: "sequential/instance_normalization/stack"
-  output: "sequential/instance_normalization/Reshape"
-}
  operation {
    type: "Mean"
-  input: "sequential/instance_normalization/Reshape"
+  input: "input_layer"
    input: "sequential/instance_normalization/moments/variance/reduction_indices"
    output: "sequential/instance_normalization/moments/mean"
    mean_options {
@@ -258,7 +237,7 @@ operation {
  }
  operation {
    type: "SquaredDifference"
-  input: "sequential/instance_normalization/Reshape"
+  input: "input_layer"
    input: "sequential/instance_normalization/moments/mean"
    output: "sequential/instance_normalization/moments/SquaredDifference"
  }
@@ -296,7 +275,7 @@ operation {
  }
  operation {
    type: "Mul"
-  input: "sequential/instance_normalization/Reshape"
+  input: "input_layer"
    input: "sequential/instance_normalization/batchnorm/mul"
    output: "sequential/instance_normalization/batchnorm/mul_1"
    mul_options {
@@ -330,11 +309,5 @@ operation {
      activation: NONE
    }
  }
-operation {
-  type: "Reshape"
-  input: "sequential/instance_normalization/batchnorm/add_1"
-  input: "sequential/instance_normalization/Shape"
-  output: "Identity"
-}
  input: "input_layer"
-output: "Identity"
+output: "sequential/instance_normalization/batchnorm/add_1"
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.rule b/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.rule

index 650827f4eb6e133808a17e155541c0afe60b49be..d6e47712fd83c7d3dfe8b21a61bc4532e564d257 100644 (file)
--- a/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.rule
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.rule
@@ -3,6 +3,6 @@
  RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
  
  RULE    "INSTANCE_NORM_EXIST"     $(op_count INSTANCE_NORM) '=' 1
-RULE    "RESHAPE_EXIST"           $(op_count RESHAPE) '=' 3
+RULE    "RESHAPE_EXIST"           $(op_count RESHAPE) '<=' 3
  RULE    "NO_ADD"                  $(op_count ADD) '=' 0
  RULE    "NO_MUL"                  $(op_count MUL) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.recipe

new file mode 100644 (file)

index 0000000..e1d3c0a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.recipe
@@ -0,0 +1,86 @@
+operand {
+  name: "Const"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "6"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Const_1"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 4
+  }
+  quant {
+    min: 0
+    max: 255
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Maximum"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 4
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Minimum"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 4
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "Minimum"
+  input: "Hole"
+  input: "Const"
+  output: "Minimum"
+}
+operation {
+  type: "Maximum"
+  input: "Minimum"
+  input: "Const_1"
+  output: "Maximum"
+}
+input: "Hole"
+output: "Maximum"
diff --git a/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.rule b/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.rule

new file mode 100644 (file)

index 0000000..9d63407
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.rule
@@ -0,0 +1,7 @@
+# To check if Maximum and Minimum is fused to Relu6.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "RELU6_EXIST"             $(op_count RELU6) '=' 1
+RULE    "NO_MAXIMUM"              $(op_count MAXIMUM) '=' 0
+RULE    "NO_MINIMUM"              $(op_count MINIMUM) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.recipe

index c12ce9d649891ae766257e2b98f6b8326a6f8e8b..3658a2bffbfa92d37f8ee05eec7b10c9fbff0aef 100644 (file)
--- a/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.recipe
@@ -7,11 +7,6 @@ operand {
      dim: 4
      dim: 16
    }
-  filler {
-    tag: "gaussian"
-    arg: "0.0"
-    arg: "0.1"
-  }
  }
  operand {
    name: "Weights1"
diff --git a/res/TensorFlowLiteRecipes/Net_Reshape_Neg_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Reshape_Neg_000/test.recipe

new file mode 100644 (file)

index 0000000..51cf3b4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Reshape_Neg_000/test.recipe
@@ -0,0 +1,35 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 dim: 6 }
+}
+operand {
+  name: "shape1"
+  type: INT32
+  shape { dim: 2 }
+  filler { tag: "explicit" arg: "6" arg: "6" }
+}
+operand {
+  name: "reshape_out"
+  type: FLOAT32
+  shape { dim: 6 dim: 6 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 6 dim: 6 }
+}
+operation {
+  type: "Reshape"
+  input: "ifm"
+  input: "shape1"
+  output: "reshape_out"
+}
+operation {
+  type: "Neg"
+  input: "reshape_out"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.recipe

new file mode 100644 (file)

index 0000000..2acb2e7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.recipe
@@ -0,0 +1,42 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 dim: 6 }
+}
+operand {
+  name: "shape1"
+  type: INT32
+  shape { dim: 2 }
+  filler { tag: "explicit" arg: "6" arg: "6" }
+}
+operand {
+  name: "shape2"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "6" arg: "2"  arg: "3" }
+}
+operand {
+  name: "reshape_out"
+  type: FLOAT32
+  shape { dim: 6 dim: 6 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 6 dim: 2 dim: 3 }
+}
+operation {
+  type: "Reshape"
+  input: "ifm"
+  input: "shape1"
+  output: "reshape_out"
+}
+operation {
+  type: "Reshape"
+  input: "reshape_out"
+  input: "shape2"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.rule b/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.rule

new file mode 100644 (file)

index 0000000..9a70601
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.rule
@@ -0,0 +1,5 @@
+# To check if Redundant Reshape removed.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "RESHAPE_EXIST"             $(op_count RESHAPE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.recipe

new file mode 100644 (file)

index 0000000..b84058b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 1 dim: 1 }
+}
+operand {
+  name: "t1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operation {
+  type: "Squeeze"
+  squeeze_options { squeeze_dim: 3 }
+  input: "ifm"
+  output: "t1"
+}
+operation {
+  type: "Squeeze"
+  squeeze_options { squeeze_dim: 2 }
+  input: "t1"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.rule b/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.rule

new file mode 100644 (file)

index 0000000..66a105a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.rule
@@ -0,0 +1,6 @@
+# To check if Squeeze is substituted to Reshape op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "SQUEEZE_COUNT"           $(op_count SQUEEZE) '=' 0
+RULE    "RESHAPE_COUNT"           $(op_count RESHAPE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.recipe b/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.recipe

new file mode 100644 (file)

index 0000000..04c0e90
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.recipe
@@ -0,0 +1,77 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 4 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "0" arg: "0" arg: "0" }
+}
+operand {
+  name: "end"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "4" }
+}
+operand {
+  name: "strides"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "1" arg: "1" }
+}
+operand {
+  name: "output_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 4 }
+}
+operation {
+  type: "StridedSlice"
+  strided_slice_options {
+    begin_mask: 0
+    end_mask: 0
+    ellipsis_mask: 0
+    new_axis_mask: 0
+    shrink_axis_mask: 0
+  }
+  input: "ifm"
+  input: "begin"
+  input: "end"
+  input: "strides"
+  output: "output_1"
+}
+operand {
+  name: "begin_2"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "0" arg: "0" arg: "0" }
+}
+operand {
+  name: "end_2"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "0" arg: "1" arg: "0" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim:1 dim: 4}
+}
+operation {
+  type: "StridedSlice"
+  strided_slice_options {
+    begin_mask: 5
+    end_mask: 5
+    ellipsis_mask: 0
+    new_axis_mask: 0
+    shrink_axis_mask: 2
+  }
+  input: "output_1"
+  input: "begin_2"
+  input: "end_2"
+  input: "strides"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.rule b/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.rule

new file mode 100644 (file)

index 0000000..f1a660d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.rule
@@ -0,0 +1,5 @@
+# To check if Unnecessary StridedSlice removed.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "STRIDEDSLICE_EXIST"      $(op_count STRIDEDSLICE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe

new file mode 100644 (file)

index 0000000..e40fe4f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
@@ -0,0 +1,156 @@
+# Tconv with asymmetric filter + BN + Relu6
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 2
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "5"
+    arg: "1"
+    arg: "2"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "FusedBatchNormV3"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+    arg: "-7.80109"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "2.00834"
+    arg: "1.00344"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "Relu6"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+  input: "Hole"
+  output: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+}
+operation {
+  type: "Mul"
+  input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3"
+  output: "Relu6"
+  add_options {
+    activation: RELU6
+  }
+}
+input: "Hole"
+output: "Relu6"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.rule

new file mode 100644 (file)

index 0000000..dfc3927
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.rule
@@ -0,0 +1,8 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "RELU6_EXIST"             $(op_count RELU6) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Part_Add_Sqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_Sqrt_000/test.recipe

new file mode 100644 (file)

index 0000000..1125246
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_Sqrt_000/test.recipe
@@ -0,0 +1,48 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "add"
+}
+operation {
+  type: "Sqrt"
+  input: "add"
+  output: "ofm1"
+}
+operation {
+  type: "Sqrt"
+  input: "add"
+  output: "ofm2"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_Sqrt_Rsqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_Sqrt_Rsqrt_000/test.recipe

new file mode 100644 (file)

index 0000000..c9cee99
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_Sqrt_Rsqrt_000/test.recipe
@@ -0,0 +1,68 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "add"
+}
+operation {
+  type: "Sqrt"
+  input: "add"
+  output: "sqrt1"
+}
+operation {
+  type: "Sqrt"
+  input: "add"
+  output: "sqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt1"
+  output: "ofm1"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt2"
+  output: "ofm2"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_Sub_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_Sub_000/test.recipe

new file mode 100644 (file)

index 0000000..8cd878a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_Sub_000/test.recipe
@@ -0,0 +1,67 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm4"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "add1"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "add1"
+  input: "ifm3"
+  output: "add2"
+}
+operation {
+  type: "Sub"
+  sub_options {
+    activation: NONE
+  }
+  input: "add2"
+  input: "ifm4"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+input: "ifm3"
+input: "ifm4"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_000/test.recipe

new file mode 100644 (file)

index 0000000..e0a6fe2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_001/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_001/test.recipe

new file mode 100644 (file)

index 0000000..89f7477
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_001/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "sqrt"
+  output: "sqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt2"
+  output: "rsqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_002/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_002/test.recipe

new file mode 100644 (file)

index 0000000..2e7e132
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_002/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_003/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_003/test.recipe

new file mode 100644 (file)

index 0000000..1cd57ae
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_003/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "rsqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "rsqrt2"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_004/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_004/test.recipe

new file mode 100644 (file)

index 0000000..3b44584
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_004/test.recipe
@@ -0,0 +1,38 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "ofm1"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "ofm2"
+}
+input: "ifm"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_000/test.recipe

new file mode 100644 (file)

index 0000000..6618fff
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_000/test.recipe
@@ -0,0 +1,56 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "rsqrt2"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "sqrt"
+  input: "rsqrt2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_001/test.recipe

new file mode 100644 (file)

index 0000000..dd3f69b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_001/test.recipe
@@ -0,0 +1,61 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "rsqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt2"
+  output: "rsqrt3"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "sqrt"
+  input: "rsqrt3"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_002/test.recipe

new file mode 100644 (file)

index 0000000..23b7458
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_002/test.recipe
@@ -0,0 +1,71 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt4"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "rsqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "rsqrt3"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt2"
+  output: "rsqrt4"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "rsqrt3"
+  input: "rsqrt4"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_003/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_003/test.recipe

new file mode 100644 (file)

index 0000000..c2dae2e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_003/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm1"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "ifm2"
+  output: "sqrt"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "rsqrt"
+  input: "sqrt"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_004/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_004/test.recipe

new file mode 100644 (file)

index 0000000..c1693f7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_004/test.recipe
@@ -0,0 +1,41 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "rsqrt"
+  input: "sqrt"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Slice_001/test.recipe b/res/TensorFlowLiteRecipes/Slice_001/test.recipe

new file mode 100644 (file)

index 0000000..20f1baa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Slice_001/test.recipe
@@ -0,0 +1,37 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "-1" arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "1" arg: "3"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+  type: "Slice"
+  input: "ifm"
+  input: "begin"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Slice_001/test.reverse b/res/TensorFlowLiteRecipes/Slice_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Squeeze_001/test.recipe b/res/TensorFlowLiteRecipes/Squeeze_001/test.recipe

new file mode 100644 (file)

index 0000000..9ac4415
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Squeeze_001/test.recipe
@@ -0,0 +1,18 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 5 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 4 dim: 5 }
+}
+operation {
+  type: "Squeeze"
+  squeeze_options { }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Squeeze_001/test.reverse b/res/TensorFlowLiteRecipes/Squeeze_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py

new file mode 100644 (file)

index 0000000..d28034b
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
@@ -0,0 +1,6 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole")
+
+op_uni_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)
+op_bidi_ = tf.compat.v1.keras.layers.Bidirectional(op_uni_)(in_)
diff --git a/res/TensorFlowPythonExamples/examples/fake_quant_with_min_max_vars/__init__.py b/res/TensorFlowPythonExamples/examples/fake_quant_with_min_max_vars/__init__.py

new file mode 100644 (file)

index 0000000..c4c9284
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/fake_quant_with_min_max_vars/__init__.py
@@ -0,0 +1,27 @@
+import tensorflow as tf
+import numpy as np
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
+
+filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
+strides = (1, 2, 2, 1)
+cv_ = tf.compat.v1.nn.conv2d(in_, filters, strides, "VALID", data_format="NHWC")
+
+op_ = tf.compat.v1.fake_quant_with_min_max_vars(cv_, 0.0, 1.0, 8, False)
+'''
+NOTE:
+'fake_quant_with_min_max_vars' is converted to QUANTIZE-DEQUANTIZE in tflite.
+To produce tflite with FAKE_QUANT Op, you need to change tf2tfliteV2.py with
+
+converter.experimental_new_converter = False
+
+and then run
+
+python3 ../../compiler/tf2tfliteV2/tf2tfliteV2.py --v2 --graph_def \
+-i ./fake_quant_with_min_max_vars.pbtxt \
+-o ./fake_quant_with_min_max_vars.tflite \
+-I Hole \
+-O FakeQuantWithMinMaxVars
+'''
diff --git a/res/TensorFlowPythonModels/examples/minimum-maximum/__init__.py b/res/TensorFlowPythonModels/examples/minimum-maximum/__init__.py

new file mode 100644 (file)

index 0000000..fe074b4
--- /dev/null
+++ b/res/TensorFlowPythonModels/examples/minimum-maximum/__init__.py
@@ -0,0 +1,15 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 16, 160, 160), name="Hole")
+
+upper_ = tf.compat.v1.constant(6.)
+lower_ = tf.compat.v1.constant(0.)
+
+min_ = tf.compat.v1.minimum(in_, upper_)
+max_ = tf.compat.v1.maximum(min_, lower_)
+'''
+python ../../compiler/tf2tfliteV2/tf2tfliteV2.py --v1 \
+-i minimum-maximum.pbtxt \
+-o minimum-maximum.tflite \
+-I Hole -O Maximum
+'''
diff --git a/res/TensorFlowPythonModels/tfpem.py b/res/TensorFlowPythonModels/tfpem.py

old mode 100644 (file)

new mode 100755 (executable)

index 01627eb..542085b
--- a/res/TensorFlowPythonModels/tfpem.py
+++ b/res/TensorFlowPythonModels/tfpem.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
  # TensorFlow Python Example Manager
  
  import tensorflow as tf
diff --git a/runtime/contrib/.clang-format b/runtime/contrib/.clang-format

deleted file mode 120000 (symlink)

index f761fe4..0000000
--- a/runtime/contrib/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../../.clang-format.8
-\ No newline at end of file
diff --git a/runtime/contrib/android/api/Prebuilt.mk b/runtime/contrib/android/api/Prebuilt.mk

index c00c7d3daaa3b49dddd876856087b168686664d7..63cf2bc7ed23ba9a8894a9a46b3d07efa5e2fc66 100644 (file)
--- a/runtime/contrib/android/api/Prebuilt.mk
+++ b/runtime/contrib/android/api/Prebuilt.mk
@@ -5,22 +5,6 @@ ifndef ONERT_PREBUILT_LIB_DIR
  $(error ONERT_PREBUILT_LIB_DIR is not set)
  endif
  
-# libcircle_loader
-include $(CLEAR_VARS)
-LOCAL_MODULE := circle_loader
-PREBUILT_LIB += circle_loader
-LOCAL_SRC_FILES := \
-               $(ONERT_PREBUILT_LIB_DIR)/libcircle_loader.so
-include $(PREBUILT_SHARED_LIBRARY)
-
-# libtflite_loader
-include $(CLEAR_VARS)
-LOCAL_MODULE := tflite_loader
-PREBUILT_LIB += tflite_loader
-LOCAL_SRC_FILES := \
-               $(ONERT_PREBUILT_LIB_DIR)/libtflite_loader.so
-include $(PREBUILT_SHARED_LIBRARY)
-
  # libnnfw
  include $(CLEAR_VARS)
  LOCAL_MODULE := nnfw-dev
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle

index 6bb7a5631371da16310ba691509aef91dc9d1010..8f0d61c47817c1d4b7e2e7f5551a4fbac213a18d 100644 (file)
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
          minSdkVersion 26
          targetSdkVersion 29
          versionCode 1
-        versionName "1.12.0"
+        versionName "1.15.0"
  
          externalNativeBuild {
              ndkBuild {
diff --git a/runtime/contrib/style_transfer_app/CMakeLists.txt b/runtime/contrib/style_transfer_app/CMakeLists.txt

index b137231ea6bff75108c666ef3178bfcd1950a329..9ffbeaec785d820e3dbea36cc7be8ee413d46ada 100644 (file)
--- a/runtime/contrib/style_transfer_app/CMakeLists.txt
+++ b/runtime/contrib/style_transfer_app/CMakeLists.txt
@@ -32,7 +32,7 @@ endif(JPEG_FOUND)
  target_link_libraries(style_transfer_app onert_core onert tflite_loader)
  target_link_libraries(style_transfer_app tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
  target_link_libraries(style_transfer_app nnfw-dev)
-target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
+target_link_libraries(tflite_comparator ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
  if(JPEG_FOUND)
    target_link_libraries(style_transfer_app ${JPEG_LIBRARIES})
  endif(JPEG_FOUND)
diff --git a/runtime/libs/.clang-format b/runtime/libs/.clang-format

deleted file mode 120000 (symlink)

index f761fe4..0000000
--- a/runtime/libs/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../../.clang-format.8
-\ No newline at end of file
diff --git a/runtime/libs/benchmark/src/Phases.cpp b/runtime/libs/benchmark/src/Phases.cpp

index 897b943d394efe981d44da40d7cf06ba74120351..76993f266a96a1bb57d45177f547c16c83975847 100644 (file)
--- a/runtime/libs/benchmark/src/Phases.cpp
+++ b/runtime/libs/benchmark/src/Phases.cpp
@@ -42,7 +42,7 @@ void SleepForMicros(uint64_t micros)
    sleep_time.tv_nsec = micros * 1e3;
    nanosleep(&sleep_time, nullptr);
  }
-}
+} // namespace
  
  namespace benchmark
  {
diff --git a/runtime/libs/misc/include/misc/RandomGenerator.h b/runtime/libs/misc/include/misc/RandomGenerator.h

index 8d26b8c74a48e0527a220961954ab81e8fb4e442..8da4f7f20686eedbea42e336c6e8285e78ac8309 100644 (file)
--- a/runtime/libs/misc/include/misc/RandomGenerator.h
+++ b/runtime/libs/misc/include/misc/RandomGenerator.h
@@ -76,6 +76,7 @@ private:
    std::normal_distribution<float> _dist;
  };
  
+template <> int8_t RandomGenerator::generate<int8_t>(void);
  template <> uint8_t RandomGenerator::generate<uint8_t>(void);
  template <> bool RandomGenerator::generate<bool>(void);
  template <> int32_t RandomGenerator::generate<int32_t>(void);
diff --git a/runtime/libs/misc/src/RandomGenerator.cpp b/runtime/libs/misc/src/RandomGenerator.cpp

index e7fbc10caa7bfe0920f462abef1ac021249bd300..af072326bbe96801e8998dbd0882efd937216fd0 100644 (file)
--- a/runtime/libs/misc/src/RandomGenerator.cpp
+++ b/runtime/libs/misc/src/RandomGenerator.cpp
@@ -21,6 +21,34 @@ namespace nnfw
  namespace misc
  {
  
+template <> int8_t RandomGenerator::generate<int8_t>(void)
+{
+  // The value of type_range is 255.
+  float type_range = static_cast<float>(std::numeric_limits<int8_t>::max()) -
+                     static_cast<float>(std::numeric_limits<int8_t>::min());
+  // Most _dist values range from -5.0 to 5.0.
+  float min_range = -5.0f;
+  float max_range = 5.0f;
+  // NOTE shifted_relative_val has Gaussian distribution that origin mean was 0 and standard
+  // deviation was 2. And then its values are distributed and shift to that mean is 127.5 and range
+  // is about [0, 255].
+  float shifted_relative_val = (_dist(_rand) - min_range) * type_range / (max_range - min_range);
+
+  // shifted_relative_val is adjusted to be mapped to end points of the range, if it is out of range
+  // values.
+  if (shifted_relative_val < -128.0f)
+  {
+    return -128;
+  }
+  else if (shifted_relative_val > type_range)
+  {
+    return 127;
+  }
+
+  // Convert shifted_relative_val from float to int8
+  return static_cast<int8_t>(shifted_relative_val);
+}
+
  template <> uint8_t RandomGenerator::generate<uint8_t>(void)
  {
    // The value of type_range is 255.
diff --git a/runtime/libs/profiling/CMakeLists.txt b/runtime/libs/profiling/CMakeLists.txt

index e0398ce93989563e515e5f3b3586e6865b1be41e..b115cc1c6556bac93a812644e2a72bc6af50981c 100644 (file)
--- a/runtime/libs/profiling/CMakeLists.txt
+++ b/runtime/libs/profiling/CMakeLists.txt
@@ -4,4 +4,3 @@ add_library(nnfw_lib_profiling STATIC ${SOURCES})
  set_property(TARGET nnfw_lib_profiling PROPERTY POSITION_INDEPENDENT_CODE ON)
  target_include_directories(nnfw_lib_profiling PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
  target_link_libraries(nnfw_lib_profiling PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_profiling PRIVATE nnfw_coverage)
diff --git a/runtime/libs/rua/anchor/CMakeLists.txt b/runtime/libs/rua/anchor/CMakeLists.txt

index 6e65641f44f05ee124d0ffef2e722250edf8e730..fb41c47ea22b5d153ce1156df16f81f3c30bbae5 100644 (file)
--- a/runtime/libs/rua/anchor/CMakeLists.txt
+++ b/runtime/libs/rua/anchor/CMakeLists.txt
@@ -6,4 +6,3 @@ target_include_directories(nnfw_lib_rua_anchor PUBLIC include)
  target_link_libraries(nnfw_lib_rua_anchor PUBLIC nnfw_lib_rua_core)
  target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_lib_rua_dyn)
  target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_coverage)
diff --git a/runtime/libs/rua/dyn/CMakeLists.txt b/runtime/libs/rua/dyn/CMakeLists.txt

index 3f9ac892813c8c1be952d91f23115346387c258b..01d8a7c021a257e39a73b59b9b1e8bb15a0b6c36 100644 (file)
--- a/runtime/libs/rua/dyn/CMakeLists.txt
+++ b/runtime/libs/rua/dyn/CMakeLists.txt
@@ -5,4 +5,3 @@ set_target_properties(nnfw_lib_rua_dyn PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(nnfw_lib_rua_dyn PUBLIC include)
  target_link_libraries(nnfw_lib_rua_dyn PUBLIC nnfw_lib_rua_core)
  target_link_libraries(nnfw_lib_rua_dyn PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_rua_dyn PRIVATE nnfw_coverage)
diff --git a/runtime/libs/tflite/CMakeLists.txt b/runtime/libs/tflite/CMakeLists.txt

index 93a3c97892756acbff9abb59a316fd2f04e07a9f..f02c93aa68beeee005cb5ea705f00d5b7d89c103 100644 (file)
--- a/runtime/libs/tflite/CMakeLists.txt
+++ b/runtime/libs/tflite/CMakeLists.txt
@@ -17,7 +17,6 @@ target_link_libraries(nnfw_lib_tflite PUBLIC tensorflow-lite-ex)
  target_link_libraries(nnfw_lib_tflite PUBLIC nnfw_lib_misc)
  target_link_libraries(nnfw_lib_tflite PRIVATE ${LIB_PTHREAD} dl)
  target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_coverage)
  
  if(NOT ENABLE_TEST)
    return()
diff --git a/runtime/libs/tflite/include/tflite/CopyInputInitializer.h b/runtime/libs/tflite/include/tflite/CopyInputInitializer.h

new file mode 100644 (file)

index 0000000..866af05
--- /dev/null
+++ b/runtime/libs/tflite/include/tflite/CopyInputInitializer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
+#define __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
+
+#include <tensorflow/lite/interpreter.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+class CopyInputInitializer
+{
+public:
+  CopyInputInitializer(::tflite::Interpreter &from) : _from{from}
+  {
+    // DO NOTHING
+  }
+
+  void run(::tflite::Interpreter &interp);
+
+private:
+  template <typename T> void setValue(::tflite::Interpreter &interp, int tensor_idx);
+
+private:
+  ::tflite::Interpreter &_from;
+};
+
+} // namespace tflite
+} // namespace nnfw
+
+#endif // __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
diff --git a/runtime/libs/tflite/include/tflite/OutputResetter.h b/runtime/libs/tflite/include/tflite/OutputResetter.h

new file mode 100644 (file)

index 0000000..424068d
--- /dev/null
+++ b/runtime/libs/tflite/include/tflite/OutputResetter.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_TFLITE_OUTPUT_RESETTER_H__
+#define __NNFW_TFLITE_OUTPUT_RESETTER_H__
+
+#include <tensorflow/lite/interpreter.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+class OutputResetter
+{
+public:
+  OutputResetter()
+  {
+    // DO NOTHING
+  }
+
+  void run(::tflite::Interpreter &interp);
+
+private:
+  template <typename T> void resetValue(::tflite::Interpreter &interp, int tensor_idx);
+};
+
+} // namespace tflite
+} // namespace nnfw
+
+#endif // __NNFW_TFLITE_OUTPUT_RESETTER_H__
diff --git a/runtime/libs/tflite/include/tflite/RandomInputInitializer.h b/runtime/libs/tflite/include/tflite/RandomInputInitializer.h

new file mode 100644 (file)

index 0000000..3c241a8
--- /dev/null
+++ b/runtime/libs/tflite/include/tflite/RandomInputInitializer.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
+#define __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
+
+#include <misc/RandomGenerator.h>
+
+#include <tensorflow/lite/interpreter.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+class RandomInputInitializer
+{
+public:
+  RandomInputInitializer(misc::RandomGenerator &randgen) : _randgen{randgen}
+  {
+    // DO NOTHING
+  }
+
+  void run(::tflite::Interpreter &interp);
+
+private:
+  template <typename T> void setValue(::tflite::Interpreter &interp, int tensor_idx);
+
+private:
+  nnfw::misc::RandomGenerator &_randgen;
+};
+
+} // namespace tflite
+} // namespace nnfw
+
+#endif // __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
diff --git a/runtime/libs/tflite/include/tflite/TensorShapeUtils.h b/runtime/libs/tflite/include/tflite/TensorShapeUtils.h

deleted file mode 100644 (file)

index ba86874..0000000
--- a/runtime/libs/tflite/include/tflite/TensorShapeUtils.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     TensorShapeUtils.h
- * @brief    This file contains utilities function of tensor shape
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
-#define __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
-
-#include "misc/tensor/Shape.h"
-
-#include <vector>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Converts tensor::Shape into a vector
- * @param[in] shape The tensor shape to be converted
- * @return vector value of given shape object
- */
-static inline std::vector<int32_t> as_dims(const nnfw::misc::tensor::Shape &shape)
-{
-  std::vector<int32_t> dims;
-
-  for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-  {
-    dims.emplace_back(shape.dim(axis));
-  }
-
-  return dims;
-}
-
-/**
- * @brief Broadcasts between two given shapes
- * @param[in] lhs_shape The left hand side shape
- * @param[in] rhs_shape The right hand side shape
- * @return The broadcasted shape
- */
-nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape,
-                                    const nnfw::misc::tensor::Shape &rhs_shape);
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
diff --git a/runtime/libs/tflite/src/CopyInputInitializer.cpp b/runtime/libs/tflite/src/CopyInputInitializer.cpp

new file mode 100644 (file)

index 0000000..1950dad
--- /dev/null
+++ b/runtime/libs/tflite/src/CopyInputInitializer.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/CopyInputInitializer.h"
+#include "tflite/TensorView.h"
+
+#include <misc/tensor/IndexIterator.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+void CopyInputInitializer::run(::tflite::Interpreter &interp)
+{
+  for (const auto &tensor_idx : interp.inputs())
+  {
+    TfLiteTensor *tensor = interp.tensor(tensor_idx);
+    switch (tensor->type)
+    {
+      case kTfLiteInt32:
+        setValue<int32_t>(interp, tensor_idx);
+        break;
+      case kTfLiteUInt8:
+        setValue<uint8_t>(interp, tensor_idx);
+        break;
+      case kTfLiteInt8:
+        setValue<int8_t>(interp, tensor_idx);
+        break;
+      case kTfLiteBool:
+        setValue<bool>(interp, tensor_idx);
+        break;
+      case kTfLiteFloat32:
+        setValue<float>(interp, tensor_idx);
+        break;
+      default:
+        throw std::runtime_error{"Not supported input type"};
+    }
+  }
+}
+
+template <typename T>
+void CopyInputInitializer::setValue(::tflite::Interpreter &interp, int tensor_idx)
+{
+  auto tensor_from_view = nnfw::tflite::TensorView<T>::make(_from, tensor_idx);
+  auto tensor_to_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
+
+  nnfw::misc::tensor::iterate(tensor_from_view.shape())
+    << [&](const nnfw::misc::tensor::Index &ind) {
+         tensor_to_view.at(ind) = tensor_from_view.at(ind);
+       };
+}
+
+} // namespace tflite
+} // namespace nnfw
diff --git a/runtime/libs/tflite/src/OutputResetter.cpp b/runtime/libs/tflite/src/OutputResetter.cpp

new file mode 100644 (file)

index 0000000..486bb40
--- /dev/null
+++ b/runtime/libs/tflite/src/OutputResetter.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/OutputResetter.h"
+#include "tflite/TensorView.h"
+
+#include <misc/tensor/IndexIterator.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+void OutputResetter::run(::tflite::Interpreter &interp)
+{
+  for (const auto &tensor_idx : interp.outputs())
+  {
+    TfLiteTensor *tensor = interp.tensor(tensor_idx);
+    switch (tensor->type)
+    {
+      case kTfLiteInt32:
+        resetValue<int32_t>(interp, tensor_idx);
+        break;
+      case kTfLiteUInt8:
+        resetValue<uint8_t>(interp, tensor_idx);
+        break;
+      case kTfLiteInt8:
+        resetValue<int8_t>(interp, tensor_idx);
+        break;
+      case kTfLiteBool:
+        resetValue<bool>(interp, tensor_idx);
+        break;
+      case kTfLiteFloat32:
+        resetValue<float>(interp, tensor_idx);
+        break;
+      default:
+        throw std::runtime_error{"Not supported output type"};
+    }
+  }
+}
+
+template <typename T> void OutputResetter::resetValue(::tflite::Interpreter &interp, int tensor_idx)
+{
+  auto tensor_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
+
+  nnfw::misc::tensor::iterate(tensor_view.shape())
+    << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = 0; };
+}
+
+} // namespace tflite
+} // namespace nnfw
diff --git a/runtime/libs/tflite/src/RandomInputInitializer.cpp b/runtime/libs/tflite/src/RandomInputInitializer.cpp

new file mode 100644 (file)

index 0000000..57dd7f6
--- /dev/null
+++ b/runtime/libs/tflite/src/RandomInputInitializer.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/RandomInputInitializer.h"
+#include "tflite/TensorView.h"
+
+#include <misc/tensor/IndexIterator.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+void RandomInputInitializer::run(::tflite::Interpreter &interp)
+{
+  for (const auto &tensor_idx : interp.inputs())
+  {
+    TfLiteTensor *tensor = interp.tensor(tensor_idx);
+    switch (tensor->type)
+    {
+      case kTfLiteFloat32:
+        setValue<float>(interp, tensor_idx);
+        break;
+      case kTfLiteInt32:
+        setValue<int32_t>(interp, tensor_idx);
+        break;
+      case kTfLiteUInt8:
+        setValue<uint8_t>(interp, tensor_idx);
+        break;
+      case kTfLiteBool:
+        setValue<bool>(interp, tensor_idx);
+        break;
+      case kTfLiteInt8:
+        setValue<int8_t>(interp, tensor_idx);
+        break;
+      default:
+        throw std::runtime_error{"Not supported input type"};
+    }
+  }
+}
+
+template <typename T>
+void RandomInputInitializer::setValue(::tflite::Interpreter &interp, int tensor_idx)
+{
+  auto tensor_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
+
+  nnfw::misc::tensor::iterate(tensor_view.shape())
+    << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = _randgen.generate<T>(); };
+}
+
+} // namespace tflite
+} // namespace nnfw
diff --git a/runtime/libs/tflite/src/RandomTestRunner.cpp b/runtime/libs/tflite/src/RandomTestRunner.cpp

index 3fa9a973f373a0f43204a46417a95103cf62b164..ae834e79e5dd5fa31e9e1d2df48c4718094db090 100644 (file)
--- a/runtime/libs/tflite/src/RandomTestRunner.cpp
+++ b/runtime/libs/tflite/src/RandomTestRunner.cpp
@@ -14,6 +14,9 @@
   * limitations under the License.
   */
  
+#include "tflite/CopyInputInitializer.h"
+#include "tflite/OutputResetter.h"
+#include "tflite/RandomInputInitializer.h"
  #include "tflite/RandomTestRunner.h"
  #include "tflite/Diff.h"
  #include "tflite/TensorLogger.h"
@@ -42,247 +45,24 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
    _nnapi = builder.build();
  
    _tfl_interp->UseNNAPI(false);
+  _nnapi->UseNNAPI(true);
  
    // Allocate Tensors
    _tfl_interp->AllocateTensors();
    _nnapi->AllocateTensors();
+}
  
+int RandomTestRunner::run(size_t running_count)
+{
    assert(_tfl_interp->inputs() == _nnapi->inputs());
+  assert(_tfl_interp->outputs() == _nnapi->outputs());
  
-  using ::tflite::Interpreter;
-  using Initializer = std::function<void(int id, Interpreter *, Interpreter *)>;
-
-  std::map<TfLiteType, Initializer> initializers;
-  std::map<TfLiteType, Initializer> reseters;
-
-  // Generate singed 32-bit integer (s32) input
-  initializers[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteInt32);
-    assert(_nnapi->tensor(id)->type == kTfLiteInt32);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    int32_t value = 0;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-      << [&](const nnfw::misc::tensor::Index &ind) {
-           // TODO Generate random values
-           tfl_interp_view.at(ind) = value;
-           nnapi_view.at(ind) = value;
-           ++value;
-         };
-  };
-
-  // Generate singed 32-bit integer (s32) input
-  reseters[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteInt32);
-    assert(_nnapi->tensor(id)->type == kTfLiteInt32);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    int32_t value = 0;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-      << [&](const nnfw::misc::tensor::Index &ind) {
-           // TODO Generate random values
-           tfl_interp_view.at(ind) = value;
-           nnapi_view.at(ind) = value;
-         };
-  };
-
-  initializers[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteUInt8);
-    assert(_nnapi->tensor(id)->type == kTfLiteUInt8);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
-      const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-      &nnfw::misc::RandomGenerator::generate<uint8_t>);
-    const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
-                                                   std::bind(fp, _randgen, _1, _2));
-    assert(tfl_interp_view.shape() == data.shape());
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-      << [&](const nnfw::misc::tensor::Index &ind) {
-           const auto value = data.at(ind);
-
-           tfl_interp_view.at(ind) = value;
-           nnapi_view.at(ind) = value;
-         };
-  };
-
-  reseters[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteUInt8);
-    assert(_nnapi->tensor(id)->type == kTfLiteUInt8);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
-      const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-      &nnfw::misc::RandomGenerator::generate<uint8_t>);
-    const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
-                                                   std::bind(fp, _randgen, _1, _2));
-    assert(tfl_interp_view.shape() == data.shape());
-
-    uint8_t value = 0;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-      << [&](const nnfw::misc::tensor::Index &ind) {
-           tfl_interp_view.at(ind) = value;
-           nnapi_view.at(ind) = value;
-         };
-  };
-
-  initializers[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteFloat32);
-    assert(_nnapi->tensor(id)->type == kTfLiteFloat32);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
-      const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-      &nnfw::misc::RandomGenerator::generate<float>);
-    const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
-                                                 std::bind(fp, _randgen, _1, _2));
-
-    assert(tfl_interp_view.shape() == data.shape());
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-      << [&](const nnfw::misc::tensor::Index &ind) {
-           const auto value = data.at(ind);
-
-           tfl_interp_view.at(ind) = value;
-           nnapi_view.at(ind) = value;
-         };
-  };
-
-  reseters[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteFloat32);
-    assert(_nnapi->tensor(id)->type == kTfLiteFloat32);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
-      const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-      &nnfw::misc::RandomGenerator::generate<float>);
-    const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
-                                                 std::bind(fp, _randgen, _1, _2));
+  nnfw::tflite::OutputResetter resetter;
+  resetter.run(*(_tfl_interp.get()));
  
-    assert(tfl_interp_view.shape() == data.shape());
+  RandomInputInitializer initializer{_randgen};
+  initializer.run(*(_tfl_interp.get()));
  
-    float value = 0;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-      << [&](const nnfw::misc::tensor::Index &ind) {
-           tfl_interp_view.at(ind) = value;
-           nnapi_view.at(ind) = value;
-         };
-  };
-
-  initializers[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteBool);
-    assert(_nnapi->tensor(id)->type == kTfLiteBool);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-      const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-      &nnfw::misc::RandomGenerator::generate<bool>);
-    const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
-                                                std::bind(fp, _randgen, _1, _2));
-
-    assert(tfl_interp_view.shape() == data.shape());
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-      << [&](const nnfw::misc::tensor::Index &ind) {
-           const auto value = data.at(ind);
-
-           tfl_interp_view.at(ind) = value;
-           nnapi_view.at(ind) = value;
-         };
-  };
-
-  reseters[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteBool);
-    assert(_nnapi->tensor(id)->type == kTfLiteBool);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-      const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-      &nnfw::misc::RandomGenerator::generate<bool>);
-    const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
-                                                std::bind(fp, _randgen, _1, _2));
-
-    assert(tfl_interp_view.shape() == data.shape());
-
-    bool value = false;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-      << [&](const nnfw::misc::tensor::Index &ind) {
-           tfl_interp_view.at(ind) = value;
-           nnapi_view.at(ind) = value;
-         };
-  };
-
-  // Fill IFM with random numbers
-  for (const auto id : _tfl_interp->inputs())
-  {
-    assert(_tfl_interp->tensor(id)->type == _nnapi->tensor(id)->type);
-
-    auto it = initializers.find(_tfl_interp->tensor(id)->type);
-
-    if (it == initializers.end())
-    {
-      throw std::runtime_error{"Not supported input type"};
-    }
-
-    it->second(id, _tfl_interp.get(), _nnapi.get());
-  }
-
-  // Fill OFM with 0
-  for (const auto id : _tfl_interp->outputs())
-  {
-    assert(_tfl_interp->tensor(id)->type == _nnapi->tensor(id)->type);
-
-    auto it = reseters.find(_tfl_interp->tensor(id)->type);
-
-    if (it == reseters.end())
-    {
-      throw std::runtime_error{"Not supported input type"};
-    }
-
-    it->second(id, _tfl_interp.get(), _nnapi.get());
-  }
-}
-
-int RandomTestRunner::run(size_t running_count)
-{
    std::cout << "[NNAPI TEST] Run T/F Lite Interpreter without NNAPI" << std::endl;
    _tfl_interp->Invoke();
  
@@ -290,13 +70,17 @@ int RandomTestRunner::run(size_t running_count)
  
    for (size_t i = 1; i <= running_count; ++i)
    {
+    resetter.run(*(_nnapi.get()));
+
+    CopyInputInitializer copy_initializer{*(_tfl_interp.get())};
+    copy_initializer.run(*(_nnapi.get()));
+
      std::cout << "[NNAPI TEST #" << i << "] Run T/F Lite Interpreter with NNAPI" << std::endl;
  
      char *env = getenv("UPSTREAM_DELEGATE");
  
      if (env && !std::string(env).compare("1"))
      {
-      _nnapi->UseNNAPI(true);
        _nnapi->Invoke();
      }
      else
diff --git a/runtime/libs/tflite/src/TensorShapeUtils.cpp b/runtime/libs/tflite/src/TensorShapeUtils.cpp

deleted file mode 100644 (file)

index 689b615..0000000
--- a/runtime/libs/tflite/src/TensorShapeUtils.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the License);
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/TensorShapeUtils.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape,
-                                    const nnfw::misc::tensor::Shape &rhs_shape)
-{
-  const uint32_t lhs_rank = lhs_shape.rank();
-  const uint32_t rhs_rank = rhs_shape.rank();
-  const uint32_t out_rank = std::max(lhs_rank, rhs_rank);
-  const uint32_t lhs_rank_diff = out_rank - lhs_rank;
-  const uint32_t rhs_rank_diff = out_rank - rhs_rank;
-
-  nnfw::misc::tensor::Shape out_shape(out_rank);
-
-  for (uint32_t axis = 0; axis < out_rank; ++axis)
-  {
-    out_shape.dim(axis) = std::max(axis < lhs_rank_diff ? 1 : lhs_shape.dim(axis - lhs_rank_diff),
-                                   axis < rhs_rank_diff ? 1 : rhs_shape.dim(axis - rhs_rank_diff));
-  }
-
-  return out_shape;
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/nnapi-header/include/NeuralNetworksEx.h b/runtime/nnapi-header/include/NeuralNetworksEx.h

index d15262e17c431eb8006983321a9355da2288df13..f0387995df3eec5cdd19f61a0de829f41c06d576 100644 (file)
--- a/runtime/nnapi-header/include/NeuralNetworksEx.h
+++ b/runtime/nnapi-header/include/NeuralNetworksEx.h
@@ -31,7 +31,8 @@ __BEGIN_DECLS
  /**
   * @brief Extended operation types
   */
-typedef enum {
+typedef enum
+{
    /** extends operation. */
  
    /**
diff --git a/runtime/onert/api/.clang-format b/runtime/onert/api/.clang-format

deleted file mode 120000 (symlink)

index 83185fe..0000000
--- a/runtime/onert/api/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../../../.clang-format.8
-\ No newline at end of file
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h

index 28703c0ebdf8c8a094e58504e49f00b7806e3358..1210e274f40b2943fd230420177a760cd2a4a2fb 100644 (file)
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
   * NNFW_VERSION is a uint32 value representing nnfw runtime version
   * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
   */
-#define NNFW_VERSION 0x01000c00
+#define NNFW_VERSION 0x01000f00
  
  #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/CustomKernel.cc b/runtime/onert/api/src/CustomKernel.cc

index 56525feff63227d383214c7dbbc69d2ea4128fa4..f094047fe97b4f4b85625109966f23125997902b 100644 (file)
--- a/runtime/onert/api/src/CustomKernel.cc
+++ b/runtime/onert/api/src/CustomKernel.cc
@@ -18,9 +18,7 @@
  
  namespace onert
  {
-namespace frontend
-{
-namespace custom
+namespace api
  {
  
  using namespace backend::custom;
@@ -64,12 +62,12 @@ public:
    }
  };
  
-Kernel::Kernel(const nnfw_custom_eval evalFunction)
+CustomKernel::CustomKernel(const nnfw_custom_eval evalFunction)
    : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
  {
  }
  
-void Kernel::configure(CustomKernelConfigParams &&inParams)
+void CustomKernel::configure(CustomKernelConfigParams &&inParams)
  {
    _userdata = inParams.userdata;
    _userdata_size = inParams.userdata_size;
@@ -77,7 +75,7 @@ void Kernel::configure(CustomKernelConfigParams &&inParams)
    _in_params = std::move(inParams);
  }
  
-void Kernel::run()
+void CustomKernel::run()
  {
    nnfw_custom_kernel_params params;
  
@@ -109,6 +107,5 @@ void Kernel::run()
    delete[] params.outputs;
  }
  
-} // namespace custom
-} // namespace frontend
+} // namespace api
  } // namespace onert
diff --git a/runtime/onert/api/src/CustomKernel.h b/runtime/onert/api/src/CustomKernel.h

index a42f7a639fc9df20d678e548f7d7c6c46df6a537..4c41dd9baa783b25234b73d5b4901f172969cffa 100644 (file)
--- a/runtime/onert/api/src/CustomKernel.h
+++ b/runtime/onert/api/src/CustomKernel.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_H__
-#define __ONERT_BACKEND_CUSTOM_KERNEL_H__
+#ifndef __ONERT_API_CUSTOM_KERNEL_H__
+#define __ONERT_API_CUSTOM_KERNEL_H__
  
  #include "nnfw_experimental.h"
  
@@ -26,15 +26,13 @@
  
  namespace onert
  {
-namespace frontend
-{
-namespace custom
+namespace api
  {
  
-class Kernel : public ::onert::exec::IFunction
+class CustomKernel : public ::onert::exec::IFunction
  {
  public:
-  explicit Kernel(nnfw_custom_eval evalFunction);
+  explicit CustomKernel(nnfw_custom_eval evalFunction);
  
    backend::custom::CustomKernelConfigParams _in_params;
  
@@ -53,8 +51,7 @@ public:
    void run() override;
  };
  
-} // namespace custom
-} // namespace frontend
+} // namespace api
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CUSTOM_KERNEL_H__
+#endif // __ONERT_API_CUSTOM_KERNEL_H__
diff --git a/runtime/onert/api/src/CustomKernelRegistry.cc b/runtime/onert/api/src/CustomKernelRegistry.cc

index 7812609d153137d6d0cba0777e4b66309edf4ad6..d97f1bb069db34b7f7d6c52236a88db6e56db10a 100644 (file)
--- a/runtime/onert/api/src/CustomKernelRegistry.cc
+++ b/runtime/onert/api/src/CustomKernelRegistry.cc
@@ -20,22 +20,39 @@
  
  namespace onert
  {
-namespace frontend
+namespace api
  {
-namespace custom
+
+class KernelBuilder : public backend::custom::IKernelBuilder
  {
+public:
+  KernelBuilder(CustomKernelRegistry *registry) : _registry(registry) {}
+
+  std::unique_ptr<exec::IFunction>
+  buildKernel(const std::string &id,
+              backend::custom::CustomKernelConfigParams &&params) const override
+  {
+    auto kernel = _registry->buildKernelForOp(id);
+    kernel->configure(std::move(params));
+
+    return kernel;
+  }
+
+private:
+  CustomKernelRegistry *_registry;
+};
  
-void KernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction)
+void CustomKernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction)
  {
    _storage.emplace(id, evalFunction);
  }
  
-std::shared_ptr<backend::custom::IKernelBuilder> KernelRegistry::getBuilder()
+std::shared_ptr<backend::custom::IKernelBuilder> CustomKernelRegistry::getBuilder()
  {
    return std::make_unique<KernelBuilder>(this);
  }
  
-std::unique_ptr<Kernel> KernelRegistry::buildKernelForOp(const std::string &id)
+std::unique_ptr<CustomKernel> CustomKernelRegistry::buildKernelForOp(const std::string &id)
  {
    auto it = _storage.find(id);
    if (it == _storage.end())
@@ -43,22 +60,8 @@ std::unique_ptr<Kernel> KernelRegistry::buildKernelForOp(const std::string &id)
      throw std::runtime_error("Unable to find associated kernel for op");
    }
  
-  return std::make_unique<Kernel>(it->second);
+  return std::make_unique<CustomKernel>(it->second);
  }
  
-// Kernel builder
-std::unique_ptr<exec::IFunction>
-KernelBuilder::buildKernel(const std::string &id,
-                           backend::custom::CustomKernelConfigParams &&params) const
-{
-  auto kernel = _registry->buildKernelForOp(id);
-  kernel->configure(std::move(params));
-
-  return kernel;
-}
-
-KernelBuilder::KernelBuilder(KernelRegistry *registry) : _registry(registry) {}
-
-} // namespace custom
-} // namespace frontend
+} // namespace api
  } // namespace onert
diff --git a/runtime/onert/api/src/CustomKernelRegistry.h b/runtime/onert/api/src/CustomKernelRegistry.h

index fe60d5bcc360466002ee9b926b9cb1a9455fd488..d39f11ad601d62b6ee8d564823cf204898c74cfe 100644 (file)
--- a/runtime/onert/api/src/CustomKernelRegistry.h
+++ b/runtime/onert/api/src/CustomKernelRegistry.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
-#define __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
+#ifndef __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
+#define __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
  
  #include "CustomKernel.h"
  
@@ -27,38 +27,22 @@
  
  namespace onert
  {
-namespace frontend
-{
-namespace custom
+namespace api
  {
  
-class KernelRegistry
+class CustomKernelRegistry
  {
  public:
    void registerKernel(const std::string &id, nnfw_custom_eval evalFunction);
  
    std::shared_ptr<backend::custom::IKernelBuilder> getBuilder();
-  std::unique_ptr<Kernel> buildKernelForOp(const std::string &id);
+  std::unique_ptr<CustomKernel> buildKernelForOp(const std::string &id);
  
  private:
    std::unordered_map<std::string, nnfw_custom_eval> _storage;
  };
  
-class KernelBuilder : public backend::custom::IKernelBuilder
-{
-public:
-  KernelBuilder(KernelRegistry *registry);
-
-  std::unique_ptr<exec::IFunction>
-  buildKernel(const std::string &id,
-              backend::custom::CustomKernelConfigParams &&params) const override;
-
-private:
-  KernelRegistry *_registry;
-};
-
-} // namespace custom
-} // namespace frontend
+} // namespace api
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
+#endif // __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc

index c3fdb131b5e09cb782a92e03a2f80a20d77f5215..316bafb52c5596143cf59017f7e84a54067ea6e9 100644 (file)
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -159,8 +159,7 @@ void setConfigKeyValues(const CfgKeyValues &keyValues)
  
  nnfw_session::nnfw_session()
    : _subgraphs{nullptr}, _execution{nullptr},
-    _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}, _tracing_ctx{
-                                                                                     nullptr}
+    _kernel_registry{std::make_shared<onert::api::CustomKernelRegistry>()}, _tracing_ctx{nullptr}
  {
    // DO NOTHING
  }
@@ -274,8 +273,8 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
  
    try
    {
-    std::string manifest_file_name(package_dir);
-    manifest_file_name += "/metadata/MANIFEST";
+    std::string package_path(package_dir);
+    std::string manifest_file_name = package_path + "/metadata/MANIFEST";
      std::ifstream mfs(manifest_file_name);
  
      // extract the filename of the first(index 0) model
@@ -288,7 +287,7 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
  
      if (!configs.empty() && !configs[0].empty())
      {
-      auto filepath = package_dir + std::string("/metadata/") + configs[0].asCString();
+      auto filepath = package_path + std::string("/metadata/") + configs[0].asString();
  
        CfgKeyValues keyValues;
        if (loadConfigure(filepath, keyValues))
@@ -297,15 +296,15 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
        }
      }
  
-    auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model
+    auto model_file_path = package_path + std::string("/") + models[0].asString(); // first model
      auto model_type = model_types[0].asString(); // first model's type
      if (model_type == "tflite")
      {
-      _subgraphs = onert::tflite_loader::loadModel(model_file_path.c_str());
+      _subgraphs = onert::tflite_loader::loadModel(model_file_path);
      }
      else if (model_type == "circle")
      {
-      _subgraphs = onert::circle_loader::loadModel(model_file_path.c_str());
+      _subgraphs = onert::circle_loader::loadModel(model_file_path);
      }
      else
      {
@@ -346,13 +345,6 @@ NNFW_STATUS nnfw_session::prepare()
      return NNFW_STATUS_INVALID_STATE;
    }
  
-  if (!_subgraphs || !primary_subgraph() || primary_subgraph()->isBuildingPhase())
-  {
-    std::cerr << "Error during model prepare : "
-              << "prepare should be run after load_model" << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
    try
    {
      _subgraphs.reset();
@@ -632,8 +624,9 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
    {
      // In this case, if we apply input shape in primary_subgraph, it will propagate after
      // compilation and excution
-    auto ind = primary_subgraph()->getInputs().at(index);
-    auto &input = primary_subgraph()->operands().at(ind);
+    auto primary_subgraph = _subgraphs->primary();
+    auto ind = primary_subgraph->getInputs().at(index);
+    auto &input = primary_subgraph->operands().at(ind);
  
      // overwrite input shape with the shape from ti
      input.info().shape(new_shape);
@@ -840,10 +833,6 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
    {
      options.graph_dump_level = toInt(value);
    }
-  else if (skey == config::OP_SEQ_MAX_NODE)
-  {
-    options.op_seq_max_node = toInt(value);
-  }
    else if (skey == config::EXECUTOR)
    {
      options.executor = value;
@@ -871,7 +860,7 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
    return NNFW_STATUS_NO_ERROR;
  }
  
-onert::ir::Graph *nnfw_session::primary_subgraph()
+const onert::ir::Graph *nnfw_session::primary_subgraph()
  {
    if (_subgraphs)
    {
@@ -883,7 +872,7 @@ onert::ir::Graph *nnfw_session::primary_subgraph()
      assert(_execution);
      // TODO Remove const_cast
      // We assumed the graph will not change after compilation, but shape could change
-    return const_cast<onert::ir::Graph *>(&_execution->primary_subgraph());
+    return &_execution->primary_subgraph();
    }
  }
  
@@ -957,7 +946,6 @@ bool nnfw_session::isStateModelLoaded()
      assert(_subgraphs);
      assert(_compiler);
      assert(!_execution);
-    assert(!primary_subgraph()->isBuildingPhase());
      return true;
    }
    else
@@ -973,7 +961,6 @@ bool nnfw_session::isStatePrepared()
      assert(!_subgraphs);
      assert(_compiler);
      assert(_execution);
-    assert(!primary_subgraph()->isBuildingPhase());
      return true;
    }
    else
@@ -989,7 +976,6 @@ bool nnfw_session::isStateRunning()
      assert(!_subgraphs);
      assert(_compiler);
      assert(_execution);
-    assert(!primary_subgraph()->isBuildingPhase());
      return true;
    }
    return false;
@@ -1002,7 +988,6 @@ bool nnfw_session::isStateFinishedRun()
      assert(!_subgraphs);
      assert(_compiler);
      assert(_execution);
-    assert(!primary_subgraph()->isBuildingPhase());
      return true;
    }
    else
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h

index a50ac72d395fff9371b151fdd9beab28e79e102d..b13962907b0273b32e53c276e4c8ee495eecb78a 100644 (file)
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -28,13 +28,10 @@
  
  namespace onert
  {
-namespace frontend
+namespace api
  {
-namespace custom
-{
-class KernelRegistry;
-}
-} // namespace frontend
+class CustomKernelRegistry;
+} // namespace api
  namespace exec
  {
  class Execution;
@@ -144,7 +141,7 @@ public:
    NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
  
  private:
-  onert::ir::Graph *primary_subgraph();
+  const onert::ir::Graph *primary_subgraph();
    bool isStateInitialized();
    bool isStateModelLoaded();
    bool isStatePrepared();
@@ -157,7 +154,7 @@ private:
    std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
    std::unique_ptr<onert::compiler::Compiler> _compiler;
    std::unique_ptr<onert::exec::Execution> _execution;
-  std::shared_ptr<onert::frontend::custom::KernelRegistry> _kernel_registry;
+  std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
  
    std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
  };
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h

index 4f48314c1f9ceff530faacd854a997d0ee32e82b..945ad83bb0f24507a442078a77f3488e7be695a4 100644 (file)
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -42,20 +42,18 @@ public:
  
    std::shared_ptr<IConfig> config() const override { return _config; }
  
-  std::unique_ptr<backend::BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
-             bool is_linear_executor) const override
+  std::unique_ptr<backend::BackendContext> newContext(ContextData &&data) const override
    {
-    const auto &operands = graph.operands();
-    const auto &operations = graph.operations();
-    auto context = std::make_unique<acl_cl::BackendContext>(this, &graph);
-    auto tm = createTensorManager(is_linear_executor);
+    const auto &graph = *data.graph;
+    const auto &operands = data.graph->operands();
+    auto context = std::make_unique<acl_cl::BackendContext>(this, std::move(data));
+    auto tm = createTensorManager(data.is_linear_executor);
      auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
      auto tb = std::make_shared<TensorBuilder>(operands, tm);
      context->tensor_registry = tr;
      context->tensor_builder = tb;
      context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr);
      context->optimizer = std::make_shared<Optimizer>(context.get());
      return context;
    }
diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc

index a6f228a4fe7e7362560ec99cd476e9a8d2b7acb5..5595043caa55f254f136f3ffe8ebd5e16a9d9c35 100644 (file)
--- a/runtime/onert/backend/acl_cl/BackendContext.cc
+++ b/runtime/onert/backend/acl_cl/BackendContext.cc
@@ -33,46 +33,34 @@ namespace acl_cl
  
  void BackendContext::initConsts()
  {
-  for (auto &op : operation_list())
-  {
-    constant_initializer->setLayout(op.layout);
-    graph()->operations().at(op.index).accept(*constant_initializer);
-  }
+  _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+    constant_initializer->setLayout(graph()->layout());
+    op.accept(*constant_initializer);
+  });
  
-  for (auto ind : operand_list())
-  {
+  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+    if (_data.external_operands.contains(ind) || !operand.isConstant())
+      return;
      const auto &obj = graph()->operands().at(ind);
      if (obj.isConstant() && !constant_initializer->exist(ind))
      {
        constant_initializer->registerDefaultInitializer(ind, obj);
      }
-  }
+  });
  
    constant_initializer->run();
  }
  
-void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+void BackendContext::planTensors()
  {
    ir::OperandIndexMap<uint32_t> uses_map;
    ir::OperandIndexMap<uint32_t> def_map;
    ir::OperandIndexSequence constants;
  
    // Prepare scanning
-  for (auto ind : operand_list())
-  {
-    const auto &obj = graph()->operands().at(ind);
-    const auto &li = lower_info.operand.at(ind);
-    if (li->def_factors().getOnlyElement().backend() != backend())
-      continue;
-
-    // Ignore unused tensor
-    if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
-    {
-      VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
-                           << std::endl;
+  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (_data.external_operands.contains(ind))
        return;
-    }
  
      uses_map[ind] = obj.getUses().size();
      def_map[ind] = obj.getDef().valid() ? 1 : 0;
@@ -80,16 +68,15 @@ void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &
      if (obj.isConstant())
        constants.append(ind);
  
-    auto factor = li->def_factors().getOnlyElement();
      if (!tensor_builder->isRegistered(ind))
      {
-      // These tensors do not exist in any op_seq (No use and def)
+      // These tensors do not exist in any operation (No use and def)
        const auto info = obj.info();
-      const auto backend_layout = factor.layout();
+      const auto layout = _data.operand_layouts.at(ind);
        // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, backend_layout);
+      tensor_builder->registerTensorInfo(ind, info, layout);
      }
-  }
+  });
  
    // Start scanning to do notify{First|Last}Use for each tensor
  
@@ -107,64 +94,65 @@ void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &
    // 1. Scan DEF of outputs. If the DEF, allocate it
    // 2. Scan DEF of inputs. If variable tensor, allocate it
    // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_seq_ind : order)
+  for (const auto op_ind : _data.op_order)
    {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    for (const auto &op_idx : op_seq.operations())
-    {
-      auto &op = graph()->operations().at(op_idx);
-      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+    const auto &op = graph()->operations().at(op_ind);
+    auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+    auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
  
-      // Define outputs
-      for (const auto &ind : op_outputs)
+    // Define outputs
+    for (const auto &ind : op_outputs)
+    {
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      assert(def_map.find(ind) != def_map.end());
+      if (def_map[ind])
        {
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        assert(def_map.find(ind) != def_map.end());
-        if (def_map[ind])
-        {
-          def_map[ind] = 0;
-          tensor_builder->notifyFirstUse(ind);
-        }
+        def_map[ind] = 0;
+        tensor_builder->notifyFirstUse(ind);
        }
+    }
  
-      // Scan variable tensors
-      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-      // non-constant because of less memory usage by memory planning in here
-      for (const auto &ind : op_inputs)
+    // Scan variable tensors
+    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+    // non-constant because of less memory usage by memory planning in here
+    for (const auto &ind : op_inputs)
+    {
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      const auto &operand = graph()->operands().at(ind);
+      if (operand.info().isVariable())
        {
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        const auto &operand = graph()->operands().at(ind);
-        if (operand.info().isVariable())
-        {
-          // The variable tensor with buffer is not supported yet
-          assert(operand.data() == nullptr);
-          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-          assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
-                 lower_info.operand.at(ind)->use_factors().size() == 1);
-          assert(uses_map[ind] == 1 && def_map[ind] == 0);
-          tensor_builder->notifyFirstUse(ind);
-        }
+        // The variable tensor with buffer is not supported yet
+        assert(operand.data() == nullptr);
+        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+        assert(uses_map[ind] == 1 && def_map[ind] == 0);
+        tensor_builder->notifyFirstUse(ind);
        }
+    }
  
-      for (const auto &ind : op_inputs)
+    for (const auto &ind : op_inputs)
+    {
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      assert(uses_map.find(ind) != uses_map.end());
+      assert(uses_map[ind] > 0);
+      uses_map[ind]--;
+      if (uses_map[ind] == 0)
        {
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        assert(uses_map.find(ind) != uses_map.end());
-        assert(uses_map[ind] > 0);
-        uses_map[ind]--;
-        if (uses_map[ind] == 0)
-        {
-          // plan for deallocation of static tensornode
-          tensor_builder->notifyLastUse(ind);
-        }
+        // plan for deallocation of static tensornode
+        tensor_builder->notifyLastUse(ind);
        }
      }
    }
  
+  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+    if (uses_map[ind] == 0)
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  });
+
    // Dispose and validate
    for (const auto &ind : constants)
    {
@@ -176,77 +164,42 @@ void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &
    }
  
    assert(
-      std::all_of(uses_map.begin(), uses_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+    std::all_of(uses_map.begin(), uses_map.end(),
+                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
  
    assert(
-      std::all_of(def_map.begin(), def_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+    std::all_of(def_map.begin(), def_map.end(),
+                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
  }
  
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                            const ir::OpSequences &op_seqs,
-                                            const ir::LowerInfoMap &lower_info)
+ITensorRegistry *BackendContext::genTensors()
  {
    optimizer->optimize();
  
-  for (const auto op_seq_ind : order)
-  {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
-                    ir::Remove::DUPLICATED;
-    for (const auto op_ind : op_seq)
-    {
-      bool op_assigned = [&]() {
-        for (auto &op_info : operation_list())
-          if (op_info.index == op_ind)
-            return true;
-        return false;
-      }();
-      if (!op_assigned)
-        continue;
+  graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (external_operands().contains(ind))
+      return;
  
-      const auto &op = graph()->operations().at(op_ind);
-      for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
-      {
-        if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
-            find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
-        {
-          const auto &operand_lower_info =
-              lower_info.operand.at(index)->def_factors().getOnlyElement();
-
-          // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
-          // op.getOutputs() of permute (CPU) returns tensor A
-          // but tensor A belongs to the backend of acl_cl.
-          // So, we have to make this tensor NOT registered for CPU.
-          if (operand_lower_info.backend() != backend())
-            continue;
-
-          const auto &obj = graph()->operands().at(index);
-          const auto frontend_layout = op_seq.getLayout();
-          const auto backend_layout = operand_lower_info.layout();
-          ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                       obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-          tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
-        }
-      }
-    }
-  }
+    const auto frontend_layout = graph()->layout();
+    const auto backend_layout = operand_layouts().at(ind);
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+  });
  
    // TODO Get compiler options from compiler, and use it rather than getting it from Env
    if (util::getConfigString(util::config::EXECUTOR) == "Linear")
    {
-    planTensors(order, op_seqs, lower_info);
+    planTensors();
    }
    else
    {
      // For the executors that does not have fixed linear execution order:
      // To make tensors never be deallocated, this is a workaround to use static memory planner
-    for (auto ind : operand_list())
-    {
+    graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
        if (tensor_builder->isRegistered(ind))
          tensor_builder->notifyFirstUse(ind);
-    }
+    });
    }
  
    tensor_builder->prepare();
@@ -254,36 +207,23 @@ ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSeque
    return tensor_registry.get();
  }
  
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                       const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
  {
    FunctionMap ret;
  
-  for (auto op_seq_ind : order)
+  for (auto op_ind : _data.op_order)
    {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    bool assigned = [&]() {
-      for (auto op_info : operation_list())
-        if (op_seq.exist(op_info.index))
-          return true;
-      return false;
-    }();
-    if (!assigned)
-      continue;
-    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
-    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
    }
  
    tensor_builder->allocate();
    initConsts();
  
    // NOTE For memory optimization, we want to free some operand data
-  for (auto ind : operand_list())
-  {
-    // TODO Remove const_cast
-    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
-    obj.releaseData();
-  }
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
  
    for (auto &it : ret)
    {
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h

index 662d767d0447ba06b80732eeaeda220b8ec99460..2638046cae71dbb8e0a35b01f162a164471ad3d8 100644 (file)
--- a/runtime/onert/backend/acl_cl/BackendContext.h
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -34,27 +34,23 @@ class Optimizer;
  class BackendContext : public onert::backend::BackendContext
  {
  public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
+  BackendContext(const Backend *backend, ContextData &&data,
                   std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                   std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
                   std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
                   std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry),
-        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
-        kernel_gen{kernel_gen}
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
+                                                                                    kernel_gen}
    {
    }
  
-  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                              const ir::OpSequences &op_seqs,
-                              const ir::LowerInfoMap &lower_info) override;
-  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
-                         const ir::OpSequences &op_seqs) override;
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
  
  private:
    void initConsts();
-  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+  void planTensors();
  
  public:
    std::shared_ptr<TensorBuilder> tensor_builder;
diff --git a/runtime/onert/backend/acl_cl/CLTimer.h b/runtime/onert/backend/acl_cl/CLTimer.h

index 722dc68efc1e5768b1ef26342660b2d94c7f2948..a9158e1af5f387bb33a5247b09732c6d871e3665 100644 (file)
--- a/runtime/onert/backend/acl_cl/CLTimer.h
+++ b/runtime/onert/backend/acl_cl/CLTimer.h
@@ -53,8 +53,8 @@ public:
                                            const cl_event *event_wait_list, cl_event *usr_event) {
        cl_event event;
        cl_int enqueue_res =
-          this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
-                                         num_events_in_wait_list, event_wait_list, &event);
+        this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
+                                       num_events_in_wait_list, event_wait_list, &event);
        this->_measured_events.emplace_back(event);
  
        // According to spec, if NULL was provided in usr_event - event shouldn't be returned
@@ -73,7 +73,7 @@ public:
      if ((props & CL_QUEUE_PROFILING_ENABLE) == 0)
      {
        cl_scheduler.set_queue(
-          cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
+        cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
      }
    };
  
diff --git a/runtime/onert/backend/acl_cl/Config.cc b/runtime/onert/backend/acl_cl/Config.cc

index 8017bdb0bea58bfee721f66d2771b9901ebb0155..c10fdc1fef38e80882ea35ede2690a5d9f234241 100644 (file)
--- a/runtime/onert/backend/acl_cl/Config.cc
+++ b/runtime/onert/backend/acl_cl/Config.cc
@@ -42,7 +42,7 @@ bool Config::initialize()
    // NOTE CLKernelLibraryEx must use the same context as CLScheduler
    // It did not check whether another device is available.
    arm_compute::CLKernelLibraryEx::get().init(
-      "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
+    "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
  
    return true;
  }
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc

index 413a7ccc3dbb914e2bdd6e2e2b870ceadd70d7cb..54b2a7a088256950f3891bfcb88aa4619ebab38c 100644 (file)
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -30,7 +30,7 @@ namespace acl_cl
  
  ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
                                           const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : acl_common::AclConstantInitializer{operands, tensor_reg}
+  : acl_common::AclConstantInitializer{operands, tensor_reg}
  {
    // DO NOTHING
  }
@@ -84,7 +84,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
        const auto &shape = model_obj.shape();
        const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
        assert(model_obj.shape().rank() == 2);
-      assert(obj.dimension(0) == 2);
+      assert(obj.getShape().dim(0) == 2);
        obj.access([&](ITensor &tensor) {
          for (auto i = 0; i < shape.dim(0); ++i)
          {
@@ -92,7 +92,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
            {
              const int32_t value = base[i * 2 + j];
              int32_t *into = reinterpret_cast<int32_t *>(
-                tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j}));
+              tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j}));
              *into = value;
            }
          }
@@ -131,7 +131,7 @@ void ConstantInitializer::visit(const ir::operation::Reverse &node)
        }
  
        auto axis =
-          acl_common::ToARMComputeAxis(ifm_rank, axis_tmp, frontend_layout, backend_layout).value();
+        acl_common::ToARMComputeAxis(ifm_rank, axis_tmp, frontend_layout, backend_layout).value();
  
        obj.access([&](ITensor &tensor) {
          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer());
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc

index 3a5ea5a0f1cbdf986b1f5413e833247436ee881f..e709286df8681344130002aafd24dae7aab00c42 100644 (file)
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -42,33 +42,27 @@ namespace acl_cl
  
  using ::onert::backend::acl_common::asAclFunction;
  using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
-    ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
+  ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
  
  KernelGenerator::KernelGenerator(
-    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-    const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
-    : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
+  : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
+    _operations_ctx(graph.operations()), _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
  {
    // DO NOTHING
  }
  
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
  {
-  // TODO Move this to IKernelGenerator
-  //      (all derivatives have the same implementation for this)
-  assert(!_return_fn_seq);
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-  _return_fn_seq->enableDynamicShapeInferer(false);
-
-  _current_layout = op_seq.getLayout();
-  for (const auto &operation_idx : op_seq.operations())
-  {
-    const auto &node = _operations_ctx.at(operation_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
-  }
+  auto ret = std::make_unique<exec::FunctionSequence>();
+  ret->enableDynamicShapeInferer(false);
+
+  const auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  ret->append(releaseFunction());
+  return ret;
  }
  
  void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
@@ -76,7 +70,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
    const auto block_size_index{
-      node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
  
    const auto NNApiInputs = 2;
    if (node.getInputs().size() != NNApiInputs)
@@ -104,7 +98,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    assert(_ctx.at(block_size_index).data());
  
    auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
-      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+    ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -129,29 +123,29 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
      case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
      {
        fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-          arm_compute::ConvertPolicy::SATURATE, act_info);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE, act_info);
        break;
      }
      case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
      {
        fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-          arm_compute::ConvertPolicy::SATURATE, act_info);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE, act_info);
        break;
      }
      case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
      {
        fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
-          act_info);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+        arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+        act_info);
        break;
      }
      case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
      {
        fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
        break;
      }
      default:
@@ -179,8 +173,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto ker_width = ker_shape.dim(2);
  
    const auto stride = node.param().stride;
-  const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
-                                            ker_width, ker_height);
+  const auto padding =
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
    const auto activation = node.param().activation;
  
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -192,9 +186,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto act_info = acl_common::asActivationLayerInfo(activation);
  
    auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
-      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
-      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+    ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+    ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -218,8 +212,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto stride = node.param().stride;
    const auto dilation = node.param().dilation;
    const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width,
-                           ker_height, dilation.width_factor, dilation.height_factor);
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
    const auto multiplier = node.param().multiplier;
    const auto activation = node.param().activation;
  
@@ -233,8 +227,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
  
    auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
-      ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
-      conv_info, multiplier, act_info, dilation_info);
+    ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+    conv_info, multiplier, act_info, dilation_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -261,15 +255,17 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
    }
  
    auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
-  std::vector<::arm_compute::ICLTensor *> input_tensors;
+  std::vector<const ::arm_compute::ICLTensor *> input_tensors;
    for (auto &ifm_ind : input_indexes)
      input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
  
    std::unique_ptr<::arm_compute::IFunction> fn;
    if (input_indexes.size() < 2)
    {
-    fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
-                                                        output_tensor->handle());
+    ::arm_compute::ICLTensor *input_tesor =
+      _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
+
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tesor, output_tensor->handle());
    }
    else
    {
@@ -277,9 +273,9 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
      const auto frontend_layout = _current_layout;
      const auto backend_layout = output_tensor->layout();
      const auto fixed_axis =
-        acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
+      acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
      fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
-        input_tensors, output_tensor->handle(), fixed_axis);
+      input_tensors, output_tensor->handle(), fixed_axis);
    }
  
    _return_fn = asAclFunction(std::move(fn));
@@ -292,13 +288,13 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
    const auto activation = node.param().activation;
    if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
      throw std::runtime_error(
-        "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
+      "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
  
    auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
                                                  ::arm_compute::CLFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
+    node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
+    std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -322,7 +318,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
    {
      const auto acl_axes =
-        acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
+      acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
      fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
                                                                keep_dims, output_tensor->handle());
    }
@@ -331,8 +327,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
      const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
  
      fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
-        _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-        output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
    }
  
    _return_fn = asAclFunction(std::move(fn));
@@ -392,8 +388,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
    auto input_tensor = _tensor_reg->getAclTensor(input_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      output_tensor->handle(), beta);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    output_tensor->handle(), beta);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -439,7 +435,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
      {
        auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
                                                                   backend_layout)
-                      .value();
+                    .value();
  
        int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
        starts[axis] = begin_value;
@@ -459,7 +455,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
    }
  
    auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
-      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -514,7 +510,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
      {
        auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
                                                                   backend_layout)
-                      .value();
+                    .value();
  
        int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
        starts[axis] = start_value;
@@ -533,7 +529,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank,
                                                           frontend_layout, backend_layout);
    const auto shrink_axis_mask = acl_common::ReorderBits<int32_t>(
-      node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
+    node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
  
    ::arm_compute::Coordinates starts_set;
    ::arm_compute::Coordinates ends_set;
@@ -554,8 +550,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    }
  
    auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
-      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
-      begin_mask, end_mask, shrink_axis_mask);
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+    begin_mask, end_mask, shrink_axis_mask);
  
    // Revert disabling applied dim_correction
    if (inputData_tensor->dimension(0) == 1)
@@ -606,7 +602,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
    else
    {
      auto backend_pv =
-        acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
+      acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
  
      fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
                                                             ofm_tensor->handle(), backend_pv);
@@ -623,11 +619,11 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
-  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
-      node.param().op_type, node.param().alpha, node.param().beta);
+  const ::arm_compute::ActivationLayerInfo act_info =
+    acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
  
    auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), act_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -648,26 +644,26 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
      case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
      {
        fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
-          arm_compute::BinaryLogicalOperation::AND);
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+        arm_compute::BinaryLogicalOperation::AND);
        break;
      }
      case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
      {
        fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
        break;
      }
      case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
      {
        fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
        break;
      }
      case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
      {
        fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
        break;
      }
      default:
@@ -696,10 +692,10 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
      case ir::operation::ElementwiseUnary::Type::ABS:
      {
        const ::arm_compute::ActivationLayerInfo act_info{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
  
        fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
-          input_tensor->handle(), output_tensor->handle(), act_info);
+        input_tensor->handle(), output_tensor->handle(), act_info);
        break;
      }
      case ir::operation::ElementwiseUnary::Type::CAST:
@@ -718,7 +714,7 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
        {
          // TODO Support converting float to int32 as round down
          fn = acl_common::generateLayer<arm_compute::CLCast>(
-            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+          input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
        }
        break;
      }
@@ -761,10 +757,10 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
      case ir::operation::ElementwiseUnary::Type::SQRT:
      {
        const ::arm_compute::ActivationLayerInfo act_info{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
  
        fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
-          input_tensor->handle(), output_tensor->handle(), act_info);
+        input_tensor->handle(), output_tensor->handle(), act_info);
        break;
      }
      default:
@@ -808,11 +804,11 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
    auto activation = node.param().activation;
  
    auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
-      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
-      epsilon);
+    ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+    epsilon);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::LSTM &node)
@@ -834,8 +830,8 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
    auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
-      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-      (arm_compute::ComparisonOperation)comparison_type);
+    input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+    (arm_compute::ComparisonOperation)comparison_type);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -871,15 +867,15 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
    if (offvalue.isConstant())
    {
      fn = acl_common::generateLayer<arm_compute::CLOneHot>(
-        indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
-        acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
+      indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
+      acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
    }
    else
    {
      auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
      fn = acl_common::generateLayer<arm_compute::CLOneHot>(
-        indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
-        output_tensor->handle(), static_cast<uint32_t>(depth), axis);
+      indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
+      output_tensor->handle(), static_cast<uint32_t>(depth), axis);
    }
  
    if (output_tensor->dimension(0) == 1)
@@ -942,14 +938,14 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
  void KernelGenerator::visit(const ir::operation::Pool2D &node)
  {
    auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
+    node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
  
    const auto ofm_index{node.getOutputs().at(0)};
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
    const auto activation = node.param().activation;
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(raw_fn)),
+    ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -997,9 +993,10 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLScale>(
-      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
-      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
-      ::arm_compute::SamplingPolicy::TOP_LEFT);
+    ifm_tensor->handle(), ofm_tensor->handle(),
+    ::arm_compute::ScaleKernelInfo{
+      ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
+      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1013,9 +1010,10 @@ void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLScale>(
-      ifm_tensor->handle(), ofm_tensor->handle(),
+    ifm_tensor->handle(), ofm_tensor->handle(),
+    ::arm_compute::ScaleKernelInfo{
        ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
-      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1024,12 +1022,12 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
  {
    const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
    const auto hidden_state_out_index{
-      node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+    node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
  
    const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
    const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
    const auto recurrent_weights_index{
-      node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+    node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
    const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
    const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
  
@@ -1046,13 +1044,13 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
    auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
  
    auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
-      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+    hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
    _return_fn = asAclFunction(std::move(copy_layer));
  
    auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
-      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+    hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
    _return_fn = asAclFunction(std::move(fn));
  }
  
@@ -1061,7 +1059,7 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
    const auto block_size_index{
-      node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
    const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
  
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -1073,8 +1071,8 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
    assert(_ctx.at(paddings_index).data());
  
    auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
-      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-      ofm_tensor->handle());
+    ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+    ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1090,7 +1088,7 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+    ifm_tensor->handle(), ofm_tensor->handle(), block_size);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1106,7 +1104,7 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
    auto values_tensor = _tensor_reg->getAclTensor(values_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
-      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+    values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1125,10 +1123,10 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
    // TODO Support optional constant dimension that normalization would be performed on
    const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
    int32_t radius =
-      2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
-  float alpha = 1.0f;                            // In the implementation to make alpha_ become 1
-  float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
-  float bias = 0.0f;                             // Don't offset the reduction.
+    2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+  float alpha = 1.0f;                          // In the implementation to make alpha_ become 1
+  float beta = 0.5f;                           // pow(reduction, -0.5) = 1 / sqrt(reduction)
+  float bias = 0.0f;                           // Don't offset the reduction.
  
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
@@ -1137,7 +1135,7 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
                                                                 radius, alpha, beta, bias, false);
  
    auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1159,8 +1157,8 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
    auto values_tensor = _tensor_reg->getAclTensor(values_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
-      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-      output_tensor->handle(), hits_tensor->handle());
+    lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+    output_tensor->handle(), hits_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1176,7 +1174,7 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
    auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
-      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+    ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1202,7 +1200,7 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
    if (node.param().padding.type == ir::PaddingType::VALID)
    {
      invalid_horizontal =
-        ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+      ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
      invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
    }
  
@@ -1213,9 +1211,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
    const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
  
    auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
-      ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
-      invalid_vertical);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+    ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+    invalid_vertical);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1231,7 +1229,7 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
    auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
-      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+    lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1240,7 +1238,7 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
  {
    const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
    const auto outputIndices_index{
-      node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
+    node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
  
    const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
  
@@ -1255,7 +1253,7 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
    auto input_tensor = _tensor_reg->getAclTensor(inputData_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
-      input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
+    input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1309,7 +1307,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    }
  
    auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
-      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+    ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
  
    // Revert disabling applied dim_correction
    if (ifm_tensor->dimension(0) == 1)
@@ -1348,11 +1346,11 @@ void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
    }
  
    auto acl_axis =
-      acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+    acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
    auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
                                               : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
    auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
-      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
+    ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1361,7 +1359,7 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
  {
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{
-      node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+    node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
  
    auto radius = node.param().radius;
    auto alpha = node.param().alpha;
@@ -1372,10 +1370,10 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(
-      ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+    ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
  
    auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1392,7 +1390,7 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
    auto input_tensor = _tensor_reg->getAclTensor(input_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
-      input_tensor->handle(), output_tensor->handle(), block_size);
+    input_tensor->handle(), output_tensor->handle(), block_size);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1426,7 +1424,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
    axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
  
    auto fn =
-      acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
+    acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1467,8 +1465,8 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
      }
  
      split_dim_revised =
-        acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised, frontend_layout, backend_layout)
-            .value();
+      acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised, frontend_layout, backend_layout)
+        .value();
      fn->configure(ifm_tensor->handle(), size_split_tensor->handle(), split_dim_revised,
                    output_tensors, node.param().num_splits);
  
@@ -1515,7 +1513,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
    }
  
    auto fn =
-      acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
+    acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
  
    // Revert disabling applied dim_correction
    if (input_tensor->dimension(0) == 1)
@@ -1538,7 +1536,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
  
    auto input_type = _ctx.at(input_index).typeInfo();
    auto data_type = acl_common::asDataType(input_type.type());
-  auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
+  auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point());
    const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
  
    auto input = _tensor_reg->getAclTensor(input_index)->handle();
@@ -1554,7 +1552,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
      const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
  
      const auto axis =
-        acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
+      acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
      padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
    }
  
@@ -1567,7 +1565,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
    }
  
    auto fn =
-      acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
+    acl_common::generateLayer<arm_compute::CLPadLayerEx>(input, output, padding_list, pixel_value);
  
    // NOTE Do not revert disabling applied dim_correction for 4D.
    // It would produce a mistach of result by incorrect offset_first_element in
@@ -1592,7 +1590,7 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
+    ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1606,7 +1604,7 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
+    ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1630,7 +1628,7 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
    }
  
    auto fn = acl_common::generateLayer<arm_compute::CLReverse>(
-      ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle());
+    ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h

index 22a7c18a380805563765d7f834edbe44e0e30b62..dc72853499eb5b0be5757432f5fb5b019e7769c6 100644 (file)
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -17,9 +17,8 @@
  #ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
  #define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
  
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
  
-#include "ir/Operands.h"
  #include "TensorBuilder.h"
  #include "AclTensorRegistry.h"
  #include "TensorManager.h"
@@ -31,15 +30,15 @@ namespace backend
  namespace acl_cl
  {
  
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
  {
  public:
-  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
                    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
  
-  void visit(const ir::OpSequence &) override;
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
  
+private:
    void visit(const ir::operation::ArgMinMax &) override;
    void visit(const ir::operation::BatchToSpaceND &) override;
    void visit(const ir::operation::BinaryArithmetic &) override;
@@ -91,9 +90,9 @@ public:
  private:
    const ir::Operands &_ctx;
    const ir::Operations &_operations_ctx;
+  const ir::Layout _current_layout;
    std::shared_ptr<TensorBuilder> _tensor_builder;
    std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
-  ir::Layout _current_layout;
  };
  
  } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc

index 7290c5688398ce02722a922d7994642ca2e1b3fc..12e805ee5c1edc5fd7e9700140c2f697e3d85090 100644 (file)
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -31,8 +31,8 @@ namespace acl_cl
  {
  
  Optimizer::Optimizer(BackendContext *context)
-    : _context{context},
-      _tensor_builder{std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
+  : _context{context}, _tensor_builder{
+                         std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
  {
    assert(context);
  }
@@ -43,12 +43,11 @@ void Optimizer::optimize()
    {
      acl_common::AclSubTensorAnalyzer sa{*_context->graph()};
      sa.setUsePadding();
-    for (auto op_info : _context->operation_list())
-    {
-      auto &op = _context->graph()->operations().at(op_info.index);
-      sa.setLayout(op_info.layout);
-      op.accept(sa);
-    }
+    _context->graph()->operations().iterate(
+      [&](const ir::OperationIndex &, const ir::Operation &op) {
+        sa.setLayout(_context->graph()->layout());
+        op.accept(sa);
+      });
  
      _tensor_builder->parent_map(sa.releaseParentMap());
    }
diff --git a/runtime/onert/backend/acl_cl/TensorBuilder.h b/runtime/onert/backend/acl_cl/TensorBuilder.h

index 91502d39a3d25007e218b573c59ee5e2fd166810..5492929fec7e152ee8aa8cd482d4643302092002 100644 (file)
--- a/runtime/onert/backend/acl_cl/TensorBuilder.h
+++ b/runtime/onert/backend/acl_cl/TensorBuilder.h
@@ -30,7 +30,7 @@ namespace acl_cl
  {
  
  using TensorBuilder =
-    acl_common::AclTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+  acl_common::AclTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
  
  } // namespace acl_cl
  } // namespace backend
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h

index ab295dbecd3229cc160d7272e34e1f9b670f64f8..2860f51f3dfe76c61932ed109e3b3b51b66cbd40 100644 (file)
--- a/runtime/onert/backend/acl_cl/TensorManager.h
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -41,20 +41,20 @@ namespace acl_cl
  {
  
  using MemoryManager =
-    acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+  acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
  
-using LinearMemoryManager = acl_common::AclLinearMemoryManager<
-    operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
-    ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
-    ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator,
-    ::arm_compute::MemoryGroup>;
+using LinearMemoryManager =
+  acl_common::AclLinearMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
+                                     ::arm_compute::MemoryManagerOnDemand,
+                                     ::arm_compute::PoolManager, ::arm_compute::BlobLifetimeManager,
+                                     ::arm_compute::CLBufferAllocator, ::arm_compute::MemoryGroup>;
  
  using InternalBufferManager = acl_common::AclInternalBufferManager<
-    ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
-    ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>;
+  ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+  ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>;
  
  using TensorManager =
-    acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+  acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
  
  inline TensorManager *createTensorManager(bool is_linear_executor)
  {
diff --git a/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc b/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc

index 2342297870d9ff4178d515b82dbbf076eccc7bd8..2c4357349006d34bd9efe22d82b9e227c5397ff5 100644 (file)
--- a/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc
+++ b/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc
@@ -27,9 +27,8 @@ namespace operand
  
  CLSubTensor::CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
                           const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
-    : _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(parent->handle(), tensor_shape,
-                                                                coords, extend_parent)),
-      _rank{rank}
+  : ICLTensor{rank}, _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(
+                       parent->handle(), tensor_shape, coords, extend_parent))
  {
    // DO NOTHING
  }
diff --git a/runtime/onert/backend/acl_cl/operand/CLSubTensor.h b/runtime/onert/backend/acl_cl/operand/CLSubTensor.h

index 91f74f3d5aed9c3f37cefd2696874615ef7f08bd..0a26e48227bc2b701c021e18270027a2bbc84730 100644 (file)
--- a/runtime/onert/backend/acl_cl/operand/CLSubTensor.h
+++ b/runtime/onert/backend/acl_cl/operand/CLSubTensor.h
@@ -38,9 +38,6 @@ public:
    CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
                const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
  
-public:
-  size_t num_dimensions() const final { return _rank; }
-
  public:
    const arm_compute::CLSubTensor *handle() const override;
    arm_compute::CLSubTensor *handle() override;
@@ -52,7 +49,6 @@ public:
  
  private:
    std::shared_ptr<arm_compute::CLSubTensor> _cl_sub_tensor;
-  size_t _rank;
  };
  
  } // namespace operand
diff --git a/runtime/onert/backend/acl_cl/operand/CLTensor.cc b/runtime/onert/backend/acl_cl/operand/CLTensor.cc

index f37edff511ab8566c6fa8766460d3f3c95432cca..38ce4647f85d858611e104312c4a7a6dc69cc66d 100644 (file)
--- a/runtime/onert/backend/acl_cl/operand/CLTensor.cc
+++ b/runtime/onert/backend/acl_cl/operand/CLTensor.cc
@@ -32,7 +32,7 @@ namespace operand
  {
  
  CLTensor::CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
-    : _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _rank{rank}, _num_uses{num_uses}
+  : ICLTensor{rank}, _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _num_uses{num_uses}
  {
    allocator()->init(info);
  }
diff --git a/runtime/onert/backend/acl_cl/operand/CLTensor.h b/runtime/onert/backend/acl_cl/operand/CLTensor.h

index c922088030addf6e6835a68c4db636db2bcd1f93..487d04662b05049838577df16234bab6c797149c 100644 (file)
--- a/runtime/onert/backend/acl_cl/operand/CLTensor.h
+++ b/runtime/onert/backend/acl_cl/operand/CLTensor.h
@@ -40,9 +40,6 @@ public:
  public:
    CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
  
-public:
-  size_t num_dimensions() const final { return _rank; }
-
  public:
    const arm_compute::CLTensor *handle() const override;
    arm_compute::CLTensor *handle() override;
@@ -61,7 +58,6 @@ public:
  
  private:
    std::shared_ptr<arm_compute::CLTensor> _cl_tensor;
-  size_t _rank;
    size_t _num_uses;
  };
  
diff --git a/runtime/onert/backend/acl_cl/operand/ICLTensor.h b/runtime/onert/backend/acl_cl/operand/ICLTensor.h

index e6e20a8bfde23395837bbf7494c89833dfef3cef..51152a3183b7a602d5d0f493ed843e3e756eaedf 100644 (file)
--- a/runtime/onert/backend/acl_cl/operand/ICLTensor.h
+++ b/runtime/onert/backend/acl_cl/operand/ICLTensor.h
@@ -33,6 +33,7 @@ namespace operand
  class ICLTensor : public acl_common::IACLTensor
  {
  public:
+  ICLTensor(size_t rank) : IACLTensor{rank} {}
    const arm_compute::ICLTensor *handle() const override = 0;
    arm_compute::ICLTensor *handle() override = 0;
  
diff --git a/runtime/onert/backend/acl_common/AclActivationBuilder.h b/runtime/onert/backend/acl_common/AclActivationBuilder.h

index bfdea6ea0bb25d001d3396dd44600a8c327d00b9..5d92a78566bd8088371725b03e3961f2077ef0db 100644 (file)
--- a/runtime/onert/backend/acl_common/AclActivationBuilder.h
+++ b/runtime/onert/backend/acl_common/AclActivationBuilder.h
@@ -49,7 +49,7 @@ std::unique_ptr<exec::IFunction>
  AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU(T_Tensor *ifm_alloc)
  {
    const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+    ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
  
    auto fn = std::make_unique<T_ActivationLayer>();
  
@@ -61,10 +61,10 @@ AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU(
  template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
  std::unique_ptr<exec::IFunction>
  AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU1(
-    T_Tensor *ifm_alloc)
+  T_Tensor *ifm_alloc)
  {
    const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+    ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
  
    auto fn = std::make_unique<T_ActivationLayer>();
  
@@ -76,10 +76,10 @@ AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU1
  template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
  std::unique_ptr<exec::IFunction>
  AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU6(
-    T_Tensor *ifm_alloc)
+  T_Tensor *ifm_alloc)
  {
    const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+    ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
  
    auto fn = std::make_unique<T_ActivationLayer>();
  
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc

index 921d107d9521fa399bf64efda191cf23387ad75a..9748ab1116c48823e55a3e499c44f94b6e06eeac 100644 (file)
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -25,7 +25,7 @@ namespace acl_common
  
  AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
                                                 const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
+  : _operands{operands}, _tensor_reg{tensor_reg}, _current_layout{ir::Layout::UNKNOWN}
  {
    // DO NOTHING
  }
@@ -126,6 +126,94 @@ void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
    permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
  }
  
+// NOTE Workaround for 16b float type. Here, this is enough since only the size of bytes matters.
+using float16 = uint16_t;
+
+void AclConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
+                                                     const ir::Operand &obj)
+{
+  // For only CONSTANTS
+  // TODO Add to check if tensor has been allocated
+  if (!obj.isConstant())
+    return;
+
+  const auto type = obj.typeInfo().type();
+  using ir::DataType;
+
+  switch (type)
+  {
+    case DataType::FLOAT32:
+      _init_map[index] = copyInit<float>;
+      break;
+    case DataType::INT32:
+      _init_map[index] = copyInit<int32_t>;
+      break;
+    case DataType::UINT32:
+      _init_map[index] = copyInit<uint32_t>;
+      break;
+    case DataType::BOOL8:
+    case DataType::QUANT_UINT8_ASYMM:
+      _init_map[index] = copyInit<uint8_t>;
+      break;
+    case DataType::QUANT_INT8_SYMM:
+    case DataType::QUANT_INT8_ASYMM:
+      _init_map[index] = copyInit<int8_t>;
+      break;
+    case DataType::FLOAT16:
+      _init_map[index] = copyInit<float16>;
+      break;
+    case DataType::INT64:
+      _init_map[index] = copyInit<int64_t>;
+      break;
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
+void AclConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
+                                                        const ir::Operand &obj)
+{
+  // For only CONSTANTS
+  // TODO Add to check if tensor has been allocated
+  if (!obj.isConstant())
+    return;
+
+  const auto type = obj.typeInfo().type();
+  using ir::DataType;
+  using namespace std::placeholders;
+
+  switch (type)
+  {
+    case DataType::FLOAT32:
+      _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
+      break;
+    case DataType::INT32:
+      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
+      break;
+    case DataType::UINT32:
+      _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout);
+      break;
+    case DataType::BOOL8:
+    case DataType::QUANT_UINT8_ASYMM:
+      _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout);
+      break;
+    case DataType::QUANT_INT8_SYMM:
+    case DataType::QUANT_INT8_ASYMM:
+      _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout);
+      break;
+    case DataType::FLOAT16:
+      _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout);
+      break;
+    case DataType::INT64:
+      _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout);
+      break;
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
  } // namespace acl_common
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h

index 894e2e7d12913e1710eca403d8ca1dab0bd0c93d..b7f66b50ecdfeae5a5fcd091630ac1cd6b429d97 100644 (file)
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -17,10 +17,19 @@
  #ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
  #define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
  
-#include <backend/cpu_common/ConstantInitializerBase.h>
-#include <ir/Operands.h>
  #include "AclTensorRegistry.h"
  
+#include <unordered_map>
+#include <functional>
+
+#include <ir/Coordinates.h>
+#include <ir/Layout.h>
+#include <ir/Operand.h>
+#include <ir/Operands.h>
+#include <ir/OperationVisitor.h>
+#include <backend/ITensorRegistry.h>
+#include <util/logging.h>
+
  namespace onert
  {
  namespace backend
@@ -28,12 +37,161 @@ namespace backend
  namespace acl_common
  {
  
-class AclConstantInitializer : public cpu_common::ConstantInitializerBase
+template <typename T>
+static void Init(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj, const bool copy,
+                 const onert::ir::Layout frontend_layout = onert::ir::Layout::UNKNOWN)
+{
+  const auto shape = model_obj.shape();
+  assert(model_obj.data());
+  auto base = reinterpret_cast<const T *>(model_obj.data()->base());
+
+  obj.access([&](::onert::backend::ITensor &tensor) {
+    switch (shape.rank())
+    {
+      case 0:
+      {
+        assert(model_obj.data()->size() == sizeof(T));
+        const auto value = *reinterpret_cast<const T *>(base);
+        T *into = reinterpret_cast<T *>(tensor.buffer());
+        *into = value;
+        break;
+      }
+      case 1:
+      {
+        auto vec_size = shape.dim(0);
+        for (int32_t n = 0; n < vec_size; ++n)
+        {
+          const T *from = reinterpret_cast<const T *>(base) + n;
+          const auto value = *from;
+
+          T *into = reinterpret_cast<T *>(tensor.buffer()) + n;
+
+          *into = value;
+        }
+        break;
+      }
+      case 2:
+      {
+        const int32_t copy_len = shape.dim(1);
+
+        for (auto i = 0; i < shape.dim(0); ++i)
+        {
+          ::onert::ir::Coordinates coords{i, 0};
+          memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len,
+                 copy_len * sizeof(T));
+        }
+        break;
+      }
+      case 3:
+      {
+        const int32_t width = shape.dim(1);
+        const int32_t copy_len = shape.dim(2);
+
+        for (auto i = 0; i < shape.dim(0); ++i)
+        {
+          for (auto j = 0; j < shape.dim(1); ++j)
+          {
+            ::onert::ir::Coordinates coords{i, j, 0};
+            memcpy(tensor.buffer() + tensor.calcOffset(coords),
+                   base + i * width * copy_len + j * copy_len, copy_len * sizeof(T));
+          }
+        }
+        break;
+      }
+      case 4:
+      {
+        const int32_t height = shape.dim(1);
+        const int32_t width = shape.dim(2);
+        const int32_t copy_len = shape.dim(3);
+        for (auto i = 0; i < shape.dim(0); ++i)
+        {
+          for (auto j = 0; j < shape.dim(1); ++j)
+          {
+            for (auto k = 0; k < shape.dim(2); ++k)
+            {
+              if (copy)
+              {
+                ::onert::ir::Coordinates coords{i, j, k, 0};
+                memcpy(tensor.buffer() + tensor.calcOffset(coords),
+                       base + i * height * width * copy_len + j * width * copy_len + k * copy_len,
+                       copy_len * sizeof(T));
+              }
+              else
+              {
+                for (auto l = 0; l < shape.dim(3); ++l)
+                {
+                  const auto coords =
+                    ::onert::ir::convertCoordinates({i, j, k, l}, frontend_layout, tensor.layout());
+                  T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
+                  T value = *(base + i * height * width * copy_len + j * width * copy_len +
+                              k * copy_len + l);
+                  *into = value;
+                }
+              }
+            }
+          }
+        }
+        break;
+      }
+      default:
+        throw std::runtime_error{"Not yet supported"};
+    }
+  });
+}
+
+template <typename T>
+void copyInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
+{
+  Init<T>(model_obj, obj, true);
+}
+
+template <typename T>
+void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj,
+                 const onert::ir::Layout frontend_layout)
  {
+  const bool copy = frontend_layout == obj.layout();
+  Init<T>(model_obj, obj, copy, frontend_layout);
+}
+
+class AclConstantInitializer : public ir::OperationVisitor
+{
+public:
+  void run()
+  {
+    assert(_tensor_reg);
+    for (const auto &it : _init_map)
+    {
+      const auto &ind = it.first;
+      const auto &fn = it.second;
+
+      const auto &model_obj = _operands.at(ind);
+      auto tensor_obj = _tensor_reg->getNativeITensor(ind);
+      assert(tensor_obj != nullptr);
+      fn(model_obj, *tensor_obj);
+      VERBOSE(FillOperandData) << "Fill data for operand " << ind << std::endl;
+    }
+    _init_map.clear();
+  }
+
  public:
    AclConstantInitializer(const ir::Operands &operands,
                           const std::shared_ptr<ITensorRegistry> &tensor_reg);
  
+public:
+  using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
+
+public:
+  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
+  {
+    registerPermuteInitializer(index, obj);
+  }
+  void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
+  void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
+
+public:
+  void setLayout(ir::Layout layout) { _current_layout = layout; }
+  bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
+
  public:
    void visit(const ir::operation::BatchToSpaceND &) override;
    void visit(const ir::operation::Conv2D &) override;
@@ -47,11 +205,11 @@ protected:
    void copyInputInitialize(const ir::Operation &node, uint32_t index);
    void permuteInputInitialize(const ir::Operation &node, uint32_t index);
  
-private:
-  std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
-
  protected:
+  const ir::Operands &_operands;
    std::shared_ptr<ITensorRegistry> _tensor_reg;
+  std::unordered_map<ir::OperandIndex, Initializer> _init_map;
+  ir::Layout _current_layout;
  };
  
  } // namespace acl_common
diff --git a/runtime/onert/backend/acl_common/AclInternalBufferManager.h b/runtime/onert/backend/acl_common/AclInternalBufferManager.h

index f893bb44b3b9544e6c1b2b2b5c60d2cd0acc4d05..cca5778d4f665fa14e2a5fc0883027694286bc50 100644 (file)
--- a/runtime/onert/backend/acl_common/AclInternalBufferManager.h
+++ b/runtime/onert/backend/acl_common/AclInternalBufferManager.h
@@ -20,7 +20,6 @@
  #include <arm_compute/runtime/IMemoryManager.h>
  #include <cassert>
  #include <memory>
-#include <backend/IMemoryManager.h>
  
  namespace onert
  {
@@ -34,10 +33,13 @@ namespace acl_common
  /**
   * @brief Interface for InternalBufferManager which has ::arm_compute::IMemoryManager pointer
   */
-struct IInternalBufferManager : public backend::IMemoryManager
+struct IInternalBufferManager
  {
    virtual ~IInternalBufferManager() = default;
  
+  virtual void allocate(void) = 0;
+  virtual void deallocate(void) = 0;
+
    /**
     * @brief Get shared_ptr of ::arm_compute::IMemoryManager
     */
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h

index 3d0813f81331f79053220489282cd1e31daf3161..e05d36a12577d80cbd9d4138817de9d1742e54cd 100644 (file)
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -32,16 +32,16 @@ namespace acl_common
  
  void enableDimCorrection(IACLTensor *tensor)
  {
-  size_t input_rank = tensor->num_dimensions();
+  size_t input_rank = tensor->getShape().rank();
    const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
-      .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
+    .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
  }
  
  void disableDimCorrection(IACLTensor *tensor)
  {
-  size_t input_rank = tensor->num_dimensions();
+  size_t input_rank = tensor->getShape().rank();
    const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
-      .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
+    .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
  }
  
  template <typename Layer, typename... Args>
@@ -74,49 +74,49 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
    // TODO Support dynamic rnn
    // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
    const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+    node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
    const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+    node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
    const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+    node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
    const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
  
    const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
    const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
    const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
    const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
    const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
    const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
    const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
    const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
    const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
    const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
    const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
    const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
    const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
    const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
    const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
    const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
    const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
    const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
    const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
    const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
    const auto cell_threshold = node.param().cell_threshold;
    const auto projection_threshold = node.param().projection_threshold;
@@ -124,8 +124,8 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
    bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
                                      operands.at(input_to_input_weights_index).shape().dim(1) != 0;
    bool has_recurrent_to_input_weights =
-      operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+    operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+    operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
    bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
    bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
    bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
@@ -163,10 +163,10 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
    auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
    auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
    auto recurrent_to_forget_weights_tensor =
-      tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
+    tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
    auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
    auto recurrent_to_output_weights_tensor =
-      tensor_reg->getAclTensor(recurrent_to_output_weights_index);
+    tensor_reg->getAclTensor(recurrent_to_output_weights_index);
  
    auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
    auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
@@ -180,12 +180,12 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
    if (has_cifg_param)
    {
      auto input_to_input_weights_tensor =
-        tensor_reg->getAclTensor(input_to_input_weights_index); // optional
+      tensor_reg->getAclTensor(input_to_input_weights_index); // optional
      auto recurrent_to_input_weights_tensor =
-        tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
+      tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
      auto cell_to_input_weights_handle =
-        has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
-                           : nullptr; // optional (non-cifg && peephole)
+      has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
+                         : nullptr; // optional (non-cifg && peephole)
      auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
      lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
                                  recurrent_to_input_weights_tensor->handle(),
@@ -194,9 +194,9 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
    if (has_peephole_param)
    {
      auto cell_to_forget_weights_tensor =
-        tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
+      tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
      auto cell_to_output_weights_tensor =
-        tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
+      tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
      lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
                                      cell_to_output_weights_tensor->handle());
    }
@@ -204,21 +204,20 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
    {
      auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
      auto projection_bias_handle = has_projection_bias
-                                      ? tensor_reg->getAclTensor(projection_bias_index)->handle()
-                                      : nullptr; // optional
+                                    ? tensor_reg->getAclTensor(projection_bias_index)->handle()
+                                    : nullptr; // optional
      lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
    }
  
    auto fn = generateLayer<T_ACLLayer>(
-      input_tensor->handle(), input_to_forget_weights_tensor->handle(),
-      input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
-      recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
-      recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
-      cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
-      output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
-      scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
-      cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
-      projection_clip);
+    input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+    input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+    recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+    recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+    cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(),
+    cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(),
+    output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(),
+    lstm_params, act_info, cell_clip, projection_clip);
  
    return std::make_unique<T_FunctionWrapper>(std::move(fn));
  }
@@ -240,14 +239,14 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
    const auto input_rank = operands.at(input_index).shape().rank();
  
    const auto output_size =
-      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
    UNUSED_RELEASE(output_size);
    assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
    assert(operands.at(weight_index).shape().dim(0) == output_size);
    const auto batch_size =
-      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
    const auto input_size =
-      operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+    operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
  
    // Check for reshaping input's shape into rank-2
    bool needs_reshape = false;
@@ -285,10 +284,10 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
    }
  
    auto fn = generateLayer<T_ACLLayer>(
-      tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
-      output_tensor->handle(), needs_reshape,
-      asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
+    tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
+    output_tensor->handle(), needs_reshape,
+    asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
  
    return std::make_unique<T_FunctionWrapper>(std::move(fn));
  }
@@ -309,7 +308,7 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
    const auto kw = node.param().kw;
    const auto stride = node.param().stride;
    const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
  
    VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
    VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
@@ -328,8 +327,8 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
    auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
  
    ::arm_compute::PoolingLayerInfo info{
-      pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
-      asPadStrideInfo(padding, stride), true /* exclude_padding */};
+    pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+    asPadStrideInfo(padding, stride), true /* exclude_padding */};
  
    auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
  
diff --git a/runtime/onert/backend/acl_common/AclLinearMemoryManager.h b/runtime/onert/backend/acl_common/AclLinearMemoryManager.h

index 09f25e7a84f39366b72f281b6dbe7bfaef6162e4..5c546b77a02a2b0e5ddae96ac539c6224a0fefef 100644 (file)
--- a/runtime/onert/backend/acl_common/AclLinearMemoryManager.h
+++ b/runtime/onert/backend/acl_common/AclLinearMemoryManager.h
@@ -23,7 +23,11 @@
  #include "ir/OperandIndexMap.h"
  #include "util/logging.h"
  
-namespace
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
  {
  
  template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager>
@@ -33,19 +37,10 @@ std::shared_ptr<T_MemoryManager> createMemoryManager()
    std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
  
    std::shared_ptr<T_MemoryManager> mem_mgr =
-      std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
+    std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
    return mem_mgr;
  }
  
-} // namespace
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_common
-{
-
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor, typename T_MemoryManager,
            typename T_PoolManager, typename T_LifetimeManager, typename T_Allocator,
            typename T_MemoryGroup>
@@ -53,9 +48,9 @@ class AclLinearMemoryManager : public AclMemoryManager<T_ITensor, T_Tensor, T_Su
  {
  public:
    AclLinearMemoryManager()
-      : _allocator{nullptr},
-        _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()},
-        _io_group{std::make_shared<T_MemoryGroup>(_io_manager)}
+    : _allocator{nullptr},
+      _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()},
+      _io_group{std::make_shared<T_MemoryGroup>(_io_manager)}
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/backend/acl_common/AclMemoryManager.h b/runtime/onert/backend/acl_common/AclMemoryManager.h

index eefcec1308c1a24c250262c9a3749875ce7eaa44..8e6bdd86aef670feea7437e1e0e2c25c3b25c53a 100644 (file)
--- a/runtime/onert/backend/acl_common/AclMemoryManager.h
+++ b/runtime/onert/backend/acl_common/AclMemoryManager.h
@@ -21,7 +21,6 @@
  #include <arm_compute/runtime/IMemoryManager.h>
  #include <cassert>
  
-#include "backend/IMemoryManager.h"
  #include "ir/OperandIndexMap.h"
  #include "Convert.h"
  #include "util/logging.h"
@@ -33,8 +32,7 @@ namespace backend
  namespace acl_common
  {
  
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclMemoryManager : public backend::IMemoryManager
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclMemoryManager
  {
  public:
    AclMemoryManager()
@@ -44,7 +42,7 @@ public:
  
    virtual ~AclMemoryManager() = default;
  
-  void allocate(void) override
+  virtual void allocate(void)
    {
      for (const auto &tensor_entry : _tensors)
      {
@@ -53,7 +51,7 @@ public:
      }
    }
  
-  void deallocate(void) override
+  virtual void deallocate(void)
    {
      for (const auto &tensor_entry : _tensors)
      {
@@ -62,8 +60,12 @@ public:
      }
    }
  
-  virtual void startLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
-  virtual void finishLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
+  virtual void startLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
+  virtual void finishLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
  
    void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank,
                     size_t num_uses)
@@ -78,7 +80,7 @@ public:
                        bool extent_parent)
    {
      auto subtensor =
-        std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent);
+      std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent);
      _subtensors[child_ind] = subtensor;
    }
  
diff --git a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h

index 3367f748f460d6519106182bfe33cc5db6fde5d4..60f4ebf7eeb7868b251f5f32f280d2bc6335fbc2 100644 (file)
--- a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
+++ b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
@@ -95,7 +95,7 @@ public:
        coordinate_info.set(axis, axis_point);
  
        _parent_map.emplace(
-          input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
+        input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
  
        axis_point += input_shape.dim(axis);
      }
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h

index 12e9ab894da46a6adbc25821d6752d81ab8b590c..7c1c5dd9a2540c29aa1e556ad34fd02aad9dd622 100644 (file)
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -131,14 +131,14 @@ namespace acl_common
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
                                                                       T_AclTensorManager *tensor_mgr)
-    : _operands{operands}, _tensor_mgr{tensor_mgr}
+  : _operands{operands}, _tensor_mgr{tensor_mgr}
  {
    assert(_tensor_mgr);
  }
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
-    const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout)
+  const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout)
  {
    assert(_tensor_mgr->constTensors().size() == 0);
    assert(_tensor_mgr->nonconstTensors().size() == 0);
@@ -175,7 +175,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
        offset = {offset[0], offset[3], offset[1], offset[2]};
      }
      else if (_operands.at(parent_index).shape().rank() >= 4 &&
-             frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW)
+             frontend_layout == ir::Layout::NCHW && backend_layout == ir::Layout::NHWC)
      {
        // Permutation changing layout beyond 4-D is not supported yet
        const auto parent_rank = _operands.at(parent_index).shape().rank();
@@ -203,7 +203,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyLastUse(const ir:
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isRegistered(
-    const ir::OperandIndex &ind) const
+  const ir::OperandIndex &ind) const
  {
    return _tensor_info_map.find(ind) != _tensor_info_map.end();
  }
@@ -221,7 +221,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
  
    std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
    std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
-      [&](ir::OperandIndex ind) -> ir::OperandIndex & {
+    [&](ir::OperandIndex ind) -> ir::OperandIndex & {
      ir::OperandIndex &ret = root_map[ind];
  
      // We know the root parent value already
@@ -313,7 +313,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
      const auto &info = entry.second;
      const auto &backend_layout = _tensor_layout_map[ind];
      auto tensor_info =
-        asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN, backend_layout, true);
+      asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN, backend_layout, true);
      _tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), info.isConstant(),
                               _uses_count_map[ind]);
    }
@@ -321,7 +321,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
    // Subtensors
    assert(_tensor_mgr->nonconstSubtensors().size() == 0);
    // TODO Iterate `_parent_map` instead, once the optimizer bug is fixed
-  //      `Optimizer` iterates the entire OpSequences, so there is a bug if iterating _parent_map
+  //      `Optimizer` iterates the entire Operations, so there is a bug if iterating _parent_map
    for (auto &entry : _tensor_info_map)
    {
      auto ind = entry.first;
@@ -368,7 +368,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
        assert(parent_tensor != nullptr);
  
        // Child's type should be same with parent
-      assert(tensor_info.typeInfo().offset() ==
+      assert(tensor_info.typeInfo().zero_point() ==
               parent_tensor->info()->quantization_info().uniform().offset);
        assert(tensor_info.typeInfo().scale() ==
               parent_tensor->info()->quantization_info().uniform().scale);
@@ -380,7 +380,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
  
        auto shape = asTensorShape(tensor_info.shape(), ir::Layout::UNKNOWN, backend_layout, true);
        ::arm_compute::Coordinates coordinates =
-          asTensorCoordinate(parent_info.coordinates, ir::Layout::UNKNOWN, backend_layout);
+        asTensorCoordinate(parent_info.coordinates, ir::Layout::UNKNOWN, backend_layout);
        _tensor_mgr->buildSubtensor(parent, current, shape, coordinates, tensor_info.shape().rank(),
                                    true);
        stack.pop();
@@ -390,7 +390,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
-    const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
+  const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
  {
    for (auto &cand : seq)
    {
@@ -404,7 +404,7 @@ bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isSubTensorOf(
-    const ir::OperandIndex &parent, const ir::OperandIndex &child)
+  const ir::OperandIndex &parent, const ir::OperandIndex &child)
  {
    auto itr = _parent_map.find(child);
    if (itr == _parent_map.end())
diff --git a/runtime/onert/backend/acl_common/AclTensorManager.h b/runtime/onert/backend/acl_common/AclTensorManager.h

index d0a56c7628826dc3d75cc9a2d4bb21ec6343ba06..268cec2016633b6e2fa6bb5dd728cbcff17c4e56 100644 (file)
--- a/runtime/onert/backend/acl_common/AclTensorManager.h
+++ b/runtime/onert/backend/acl_common/AclTensorManager.h
@@ -19,7 +19,6 @@
  
  #include <arm_compute/runtime/IMemoryManager.h>
  
-#include "backend/ITensorManager.h"
  #include "AclMemoryManager.h"
  #include "AclInternalBufferManager.h"
  #include "ir/OperandIndexMap.h"
@@ -31,8 +30,7 @@ namespace backend
  namespace acl_common
  {
  
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorManager : public backend::ITensorManager
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorManager
  {
  public:
    using T_AclMemoryManager = AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor>;
@@ -97,9 +95,9 @@ namespace acl_common
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::AclTensorManager(
-    T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
-    IInternalBufferManager *inter_mgr)
-    : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr}
+  T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
+  IInternalBufferManager *inter_mgr)
+  : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr}
  {
    // DO NOTHING
  }
@@ -142,8 +140,8 @@ void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateInternalBuffe
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildTensor(
-    const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const,
-    size_t num_uses)
+  const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const,
+  size_t num_uses)
  {
    assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
    if (as_const)
@@ -160,9 +158,9 @@ void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildTensor(
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensor(
-    const ir::OperandIndex &parent, const ir::OperandIndex &child,
-    const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates,
-    size_t rank, bool extent_parent)
+  const ir::OperandIndex &parent, const ir::OperandIndex &child,
+  const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates,
+  size_t rank, bool extent_parent)
  {
    assert(_ind_to_mgr.find(child) == _ind_to_mgr.end());
    std::shared_ptr<T_ITensor> parent_tensor = findTensorAsParent(parent);
@@ -261,7 +259,7 @@ AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::internal_buffer_manager(void
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::iterate(
-    const std::function<void(const ir::OperandIndex &)> &fn)
+  const std::function<void(const ir::OperandIndex &)> &fn)
  {
    for (auto it : _nonconst_mgr->tensors())
      fn(it.first);
@@ -286,7 +284,7 @@ void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::tryDeallocConstants(voi
      // used in several nodes.
      if (tensor->handle() && !tensor->handle()->is_used() && tensor->num_uses() < 2)
      {
-      VERBOSE(AclTensorManager) << "Tensor #" << ind.value()
+      VERBOSE(AclTensorManager) << "Tensor " << ind
                                  << " will be deallocated as an unused constant tensor" << std::endl;
        tensor->allocator()->free();
        tensor.reset();
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc

index 7d3a690325254bc36f6b2718c472b186a0a0edee..673d524e32cd71d7e4d286a8fcaadf3333b3678f 100644 (file)
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -136,8 +136,8 @@ namespace acl_common
                                         bool apply_dim_correction)
  {
    ::arm_compute::TensorInfo info(
-      asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1,
-      asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.offset()));
+    asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1,
+    asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.zero_point()));
    info.set_data_layout(asDataLayout(backend_layout));
    return info;
  }
@@ -162,24 +162,24 @@ namespace acl_common
        return ::arm_compute::ActivationLayerInfo{};
      case ir::Activation::RELU:
        return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
      case ir::Activation::RELU1:
        return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
      case ir::Activation::RELU6:
        return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
      // Cases for activation of LSTM.
      case ir::Activation::TANH:
        return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
      case ir::Activation::SIGMOID:
        // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
        // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
        // 0(always sigmoid) regardless of values of the parameter.
        //      If ACL support non-sigmoid logistic, should fix param values.
        return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
      default:
        throw std::runtime_error{"Not supported internal activation, yet"};
        break;
@@ -198,32 +198,32 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type,
          if (alpha == ir::operation::ElementwiseActivation::infinity)
          {
            return ::arm_compute::ActivationLayerInfo{
-              ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+            ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
          }
          else
          {
            return ::arm_compute::ActivationLayerInfo{
-              ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+            ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
          }
        }
        else
        {
          return ::arm_compute::ActivationLayerInfo{
-            ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
        }
      case ir::operation::ElementwiseActivation::Type::TANH:
        return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
      case ir::operation::ElementwiseActivation::Type::LOGISTIC:
        // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
        // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
        // 0(always sigmoid) regardless of values of the parameter.
        //      If ACL support non-sigmoid logistic, should fix param values.
        return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
      case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
        return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
      default:
        throw std::runtime_error{"Not supported internal elementwise activation, yet"};
        break;
diff --git a/runtime/onert/backend/acl_common/IACLTensor.cc b/runtime/onert/backend/acl_common/IACLTensor.cc

index 70988bd11436b8175d9a142ef38d268d2cd1b1e5..9920750fc242c31b631b7d65fc75b9977cb58c57 100644 (file)
--- a/runtime/onert/backend/acl_common/IACLTensor.cc
+++ b/runtime/onert/backend/acl_common/IACLTensor.cc
@@ -25,26 +25,14 @@ namespace backend
  namespace acl_common
  {
  
-size_t IACLTensor::dimension(size_t index) const
-{
-  // Assume that the front is higher dimensional.
-  // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
-  // NOTE This tensor must not be applied dim correction
-  auto rank = num_dimensions();
-  rank = rank == 0 ? 1 : rank;
-  assert(rank > index);
-  const ARMComputeAxis reversed{(static_cast<uint32_t>(rank - index) - 1)};
-  return info()->dimension(reversed.value());
-}
-
  size_t IACLTensor::calcOffset(const ir::Coordinates &coords) const
  {
-  auto rank = num_dimensions();
+  auto rank = _rank;
    rank = rank == 0 ? 1 : rank;
-  assert(rank == coords.size());
+  assert(static_cast<size_t>(rank) == coords.size());
  
    ::arm_compute::Coordinates acl_coords;
-  for (uint32_t i = 0; i < rank; ++i)
+  for (size_t i = 0; i < rank; ++i)
    {
      const ARMComputeAxis reversed{static_cast<uint32_t>((rank - i) - 1)};
      acl_coords.set(reversed.value(), coords[i]);
@@ -66,12 +54,22 @@ float IACLTensor::data_scale() const
    return info()->quantization_info().uniform().scale;
  }
  
-int32_t IACLTensor::data_offset() const
+int32_t IACLTensor::data_zero_point() const
  {
    // FIXME What if quantization info is non-uniform?
    return info()->quantization_info().uniform().offset;
  }
  
+const std::vector<float> &IACLTensor::data_scales() const
+{
+  throw std::runtime_error("IACLTensor::data_scales() is not supported.");
+}
+
+const std::vector<int32_t> &IACLTensor::data_zero_points() const
+{
+  throw std::runtime_error("IACLTensor::data_zero_points() is not supported.");
+}
+
  } // namespace acl_common
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/acl_common/IACLTensor.h b/runtime/onert/backend/acl_common/IACLTensor.h

index 3d1268940bfce449dc3e27d4d1aa9ce10845fb19..7ea6327a7c643482cf273a0e35063859e0f8d151 100644 (file)
--- a/runtime/onert/backend/acl_common/IACLTensor.h
+++ b/runtime/onert/backend/acl_common/IACLTensor.h
@@ -19,6 +19,7 @@
  
  #include <backend/ITensor.h>
  #include <arm_compute/core/ITensor.h>
+#include "Swizzle.h"
  
  namespace onert
  {
@@ -42,17 +43,27 @@ public:
    IACLTensor(IACLTensor &&) = default;
    IACLTensor &operator=(IACLTensor &&) = default;
  
+  IACLTensor(size_t rank) : _rank{rank} {}
+
  public:
    uint8_t *buffer() const final { return handle()->buffer(); }
    size_t total_size() const final { return info()->total_size(); }
-  size_t dimension(size_t index) const final;
    size_t calcOffset(const ir::Coordinates &coords) const final;
    ir::Layout layout() const final;
    ir::DataType data_type() const final;
    float data_scale() const override;
-  int32_t data_offset() const override;
+  int32_t data_zero_point() const override;
+  const std::vector<float> &data_scales() const override;
+  const std::vector<int32_t> &data_zero_points() const override;
    bool has_padding() const override { return info()->has_padding(); }
    bool is_dynamic() const override { return false; }
+  ir::Shape getShape() const override
+  {
+    onert::ir::Shape shape(num_dimensions());
+    for (uint32_t d = 0; d < num_dimensions(); d++)
+      shape.dim(d) = dimension(d);
+    return shape;
+  }
  
  public:
    virtual const arm_compute::ITensor *handle() const = 0;
@@ -60,6 +71,22 @@ public:
  
    const arm_compute::ITensorInfo *info() const { return handle()->info(); }
    arm_compute::ITensorInfo *info() { return handle()->info(); }
+
+  size_t dimension(size_t index) const
+  {
+    // Assume that the front is higher dimensional.
+    // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
+    // NOTE This tensor must not be applied dim correction
+    auto rank = _rank;
+    rank = rank == 0 ? 1 : rank;
+    assert(rank > index);
+    const ARMComputeAxis reversed{(static_cast<uint32_t>(rank - index) - 1)};
+    return info()->dimension(reversed.value());
+  }
+  size_t num_dimensions() const { return _rank; }
+
+protected:
+  size_t _rank; // Actual rank (reflects extended rank)
  };
  
  } // namespace acl_common
diff --git a/runtime/onert/backend/acl_common/Swizzle.h b/runtime/onert/backend/acl_common/Swizzle.h

index e1c7f8041509c814a7df45a0ceee64d1b015720c..61338f972de23f38d8fcdc042a653eb566e05cda 100644 (file)
--- a/runtime/onert/backend/acl_common/Swizzle.h
+++ b/runtime/onert/backend/acl_common/Swizzle.h
@@ -131,7 +131,7 @@ getARMComputePermutationVector(uint32_t rank, const std::vector<int32_t> runtime
    }
  
    ::arm_compute::PermutationVector ACL_PV =
-      ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
+    ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
    ACL_PV.set_num_dimensions(rank);
  
    return ACL_PV;
@@ -146,7 +146,7 @@ inline T ReorderBits(T in, size_t numOfBits, const ir::Layout org_layout = ir::L
    for (int32_t i = numOfBits - 1; i >= 0; --i)
    {
      const uint32_t toShift =
-        numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1;
+      numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1;
      out += ((in & 1) << toShift);
      in >>= 1;
    }
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h

index b11c197331553c134b2520f9beceb0a557e424a7..62b163b116a4529a4f46903c91380e7aeebeb611 100644 (file)
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -42,20 +42,18 @@ public:
  
    std::shared_ptr<IConfig> config() const override { return _config; }
  
-  std::unique_ptr<backend::BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
-             bool is_linear_executor) const override
+  std::unique_ptr<backend::BackendContext> newContext(ContextData &&data) const override
    {
-    const auto &operands = graph.operands();
-    const auto &operations = graph.operations();
-    auto context = std::make_unique<acl_neon::BackendContext>(this, &graph);
-    auto tm = createTensorManager(is_linear_executor);
+    const auto &graph = *data.graph;
+    const auto &operands = data.graph->operands();
+    auto context = std::make_unique<acl_neon::BackendContext>(this, std::move(data));
+    auto tm = createTensorManager(data.is_linear_executor);
      auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
      auto tb = std::make_shared<TensorBuilder>(operands, tm);
      context->tensor_registry = tr;
      context->tensor_builder = tb;
      context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr);
      context->optimizer = std::make_shared<Optimizer>(context.get());
      return context;
    }
diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc

index 8b53171f7f6a718421448aa7ce0647a3f073eb04..4de3de02ddf61aa9b8e0c687c4f65d8d7c3e7671 100644 (file)
--- a/runtime/onert/backend/acl_neon/BackendContext.cc
+++ b/runtime/onert/backend/acl_neon/BackendContext.cc
@@ -33,46 +33,34 @@ namespace acl_neon
  
  void BackendContext::initConsts()
  {
-  for (auto &op : operation_list())
-  {
-    constant_initializer->setLayout(op.layout);
-    graph()->operations().at(op.index).accept(*constant_initializer);
-  }
+  _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+    constant_initializer->setLayout(graph()->layout());
+    op.accept(*constant_initializer);
+  });
  
-  for (auto ind : operand_list())
-  {
+  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+    if (_data.external_operands.contains(ind) || !operand.isConstant())
+      return;
      const auto &obj = graph()->operands().at(ind);
      if (obj.isConstant() && !constant_initializer->exist(ind))
      {
        constant_initializer->registerDefaultInitializer(ind, obj);
      }
-  }
+  });
  
    constant_initializer->run();
  }
  
-void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+void BackendContext::planTensors()
  {
    ir::OperandIndexMap<uint32_t> uses_map;
    ir::OperandIndexMap<uint32_t> def_map;
    ir::OperandIndexSequence constants;
  
    // Prepare scanning
-  for (auto ind : operand_list())
-  {
-    const auto &obj = graph()->operands().at(ind);
-    const auto &li = lower_info.operand.at(ind);
-    if (li->def_factors().getOnlyElement().backend() != backend())
-      continue;
-
-    // Ignore unused tensor
-    if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
-    {
-      VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
-                           << std::endl;
+  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (_data.external_operands.contains(ind))
        return;
-    }
  
      uses_map[ind] = obj.getUses().size();
      def_map[ind] = obj.getDef().valid() ? 1 : 0;
@@ -80,16 +68,15 @@ void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &
      if (obj.isConstant())
        constants.append(ind);
  
-    auto factor = li->def_factors().getOnlyElement();
      if (!tensor_builder->isRegistered(ind))
      {
-      // These tensors do not exist in any op_seq (No use and def)
+      // These tensors do not exist in any operation (No use and def)
        const auto info = obj.info();
-      const auto backend_layout = factor.layout();
+      const auto layout = _data.operand_layouts.at(ind);
        // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, backend_layout);
+      tensor_builder->registerTensorInfo(ind, info, layout);
      }
-  }
+  });
  
    // Start scanning to do notify{First|Last}Use for each tensor
  
@@ -107,64 +94,66 @@ void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &
    // 1. Scan DEF of outputs. If the DEF, allocate it
    // 2. Scan DEF of inputs. If variable tensor, allocate it
    // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_seq_ind : order)
+  for (const auto op_ind : _data.op_order)
    {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    for (const auto &op_idx : op_seq.operations())
-    {
-      auto &op = graph()->operations().at(op_idx);
-      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+    auto op_inputs =
+      graph()->operations().at(op_ind).getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+    auto op_outputs = graph()->operations().at(op_ind).getOutputs() | ir::Remove::DUPLICATED |
+                      ir::Remove::UNDEFINED;
  
-      // Define outputs
-      for (const auto &ind : op_outputs)
+    // Define outputs
+    for (const auto &ind : op_outputs)
+    {
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      assert(def_map.find(ind) != def_map.end());
+      if (def_map[ind])
        {
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        assert(def_map.find(ind) != def_map.end());
-        if (def_map[ind])
-        {
-          def_map[ind] = 0;
-          tensor_builder->notifyFirstUse(ind);
-        }
+        def_map[ind] = 0;
+        tensor_builder->notifyFirstUse(ind);
        }
+    }
  
-      // Scan variable tensors
-      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-      // non-constant because of less memory usage by memory planning in here
-      for (const auto &ind : op_inputs)
+    // Scan variable tensors
+    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+    // non-constant because of less memory usage by memory planning in here
+    for (const auto &ind : op_inputs)
+    {
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      const auto &operand = graph()->operands().at(ind);
+      if (operand.info().isVariable())
        {
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        const auto &operand = graph()->operands().at(ind);
-        if (operand.info().isVariable())
-        {
-          // The variable tensor with buffer is not supported yet
-          assert(operand.data() == nullptr);
-          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-          assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
-                 lower_info.operand.at(ind)->use_factors().size() == 1);
-          assert(uses_map[ind] == 1 && def_map[ind] == 0);
-          tensor_builder->notifyFirstUse(ind);
-        }
+        // The variable tensor with buffer is not supported yet
+        assert(operand.data() == nullptr);
+        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+        assert(uses_map[ind] == 1 && def_map[ind] == 0);
+        tensor_builder->notifyFirstUse(ind);
        }
+    }
  
-      for (const auto &ind : op_inputs)
+    for (const auto &ind : op_inputs)
+    {
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      assert(uses_map.find(ind) != uses_map.end());
+      assert(uses_map[ind] > 0);
+      uses_map[ind]--;
+      if (uses_map[ind] == 0)
        {
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        assert(uses_map.find(ind) != uses_map.end());
-        assert(uses_map[ind] > 0);
-        uses_map[ind]--;
-        if (uses_map[ind] == 0)
-        {
-          // plan for deallocation of static tensornode
-          tensor_builder->notifyLastUse(ind);
-        }
+        // plan for deallocation of static tensornode
+        tensor_builder->notifyLastUse(ind);
        }
      }
    }
  
+  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+    if (uses_map[ind] == 0)
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  });
+
    // Dispose and validate
    for (const auto &ind : constants)
    {
@@ -176,77 +165,42 @@ void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &
    }
  
    assert(
-      std::all_of(uses_map.begin(), uses_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+    std::all_of(uses_map.begin(), uses_map.end(),
+                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
  
    assert(
-      std::all_of(def_map.begin(), def_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+    std::all_of(def_map.begin(), def_map.end(),
+                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
  }
  
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                            const ir::OpSequences &op_seqs,
-                                            const ir::LowerInfoMap &lower_info)
+ITensorRegistry *BackendContext::genTensors()
  {
    optimizer->optimize();
  
-  for (const auto op_seq_ind : order)
-  {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
-                    ir::Remove::DUPLICATED;
-    for (const auto op_ind : op_seq)
-    {
-      bool op_assigned = [&]() {
-        for (auto &op_info : operation_list())
-          if (op_info.index == op_ind)
-            return true;
-        return false;
-      }();
-      if (!op_assigned)
-        continue;
+  graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (external_operands().contains(ind))
+      return;
  
-      const auto &op = graph()->operations().at(op_ind);
-      for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
-      {
-        if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
-            find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
-        {
-          const auto &operand_lower_info =
-              lower_info.operand.at(index)->def_factors().getOnlyElement();
-
-          // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
-          // op.getOutputs() of permute (CPU) returns tensor A
-          // but tensor A belongs to the backend of acl_cl.
-          // So, we have to make this tensor NOT registered for CPU.
-          if (operand_lower_info.backend() != backend())
-            continue;
-
-          const auto &obj = graph()->operands().at(index);
-          const auto frontend_layout = op_seq.getLayout();
-          const auto backend_layout = operand_lower_info.layout();
-          ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                       obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-          tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
-        }
-      }
-    }
-  }
+    const auto frontend_layout = graph()->layout();
+    const auto backend_layout = operand_layouts().at(ind);
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+  });
  
    // TODO Get compiler options from compiler, and use it rather than getting it from Env
    if (util::getConfigString(util::config::EXECUTOR) == "Linear")
    {
-    planTensors(order, op_seqs, lower_info);
+    planTensors();
    }
    else
    {
      // For the executors that does not have fixed linear execution order:
      // To make tensors never be deallocated, this is a workaround to use static memory planner
-    for (auto ind : operand_list())
-    {
+    graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
        if (tensor_builder->isRegistered(ind))
          tensor_builder->notifyFirstUse(ind);
-    }
+    });
    }
  
    tensor_builder->prepare();
@@ -254,36 +208,23 @@ ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSeque
    return tensor_registry.get();
  }
  
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                       const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
  {
    FunctionMap ret;
  
-  for (auto op_seq_ind : order)
+  for (auto op_ind : _data.op_order)
    {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    bool assigned = [&]() {
-      for (auto op_info : operation_list())
-        if (op_seq.exist(op_info.index))
-          return true;
-      return false;
-    }();
-    if (!assigned)
-      continue;
-    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
-    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
    }
  
    tensor_builder->allocate();
    initConsts();
  
    // NOTE For memory optimization, we want to free some operand data
-  for (auto ind : operand_list())
-  {
-    // TODO Remove const_cast
-    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
-    obj.releaseData();
-  }
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
  
    for (auto &it : ret)
    {
@@ -297,6 +238,6 @@ FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIn
    return ret;
  }
  
-} // namespace neon
+} // namespace acl_neon
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h

index dd764c09173b6b4c4e77ccf850a093f8c1ceb0cd..35d777f7bd544d10f360118d5899dcae7faf0655 100644 (file)
--- a/runtime/onert/backend/acl_neon/BackendContext.h
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -34,27 +34,23 @@ class Optimizer;
  class BackendContext : public onert::backend::BackendContext
  {
  public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
+  BackendContext(const Backend *backend, ContextData &&data,
                   std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                   std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
                   std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
                   std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry),
-        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
-        kernel_gen{kernel_gen}
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
+                                                                                    kernel_gen}
    {
    }
  
-  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                              const ir::OpSequences &op_seqs,
-                              const ir::LowerInfoMap &lower_info) override;
-  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
-                         const ir::OpSequences &op_seqs) override;
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
  
  private:
    void initConsts();
-  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+  void planTensors();
  
  public:
    // TODO Make it private
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc

index 79edb9ded436f1aaaf1e0476a3037894dd479a5b..35da7c9524a26137cd917a4fb2f4a86e70c4881b 100644 (file)
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -25,7 +25,7 @@ namespace acl_neon
  
  ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
                                           const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : acl_common::AclConstantInitializer{operands, tensor_reg}
+  : acl_common::AclConstantInitializer{operands, tensor_reg}
  {
    // DO NOTHING
  }
@@ -72,11 +72,11 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
            {
              const int32_t value = base[i * 2 + j];
              int32_t *into = reinterpret_cast<int32_t *>(
-                // The coordinates of NETensor are different from the coordiantes of CLTensor in
-                // this operand.
-                // NEON : {j, reversed i}
-                // CL : {reversed i, j}
-                tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1}));
+              // The coordinates of NETensor are different from the coordiantes of CLTensor in
+              // this operand.
+              // NEON : {j, reversed i}
+              // CL : {reversed i, j}
+              tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1}));
              *into = value;
            }
          }
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc

index e712dfa8166450bc10b0f7336175533502659a7d..94ea86dcf1d1ab160bbb7c03cc536ddbf1e2d00f 100644 (file)
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -41,33 +41,27 @@ namespace acl_neon
  
  using ::onert::backend::acl_common::asAclFunction;
  using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
-    ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
+  ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
  
  KernelGenerator::KernelGenerator(
-    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-    const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
-    : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
+  : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
+    _operations_ctx(graph.operations()), _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
  {
    // DO NOTHING
  }
  
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
  {
-  // TODO Move this to IKernelGenerator
-  //      (all derivatives have the same implementation for this)
-  assert(!_return_fn_seq);
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-  _return_fn_seq->enableDynamicShapeInferer(false);
-
-  _current_layout = op_seq.getLayout();
-  for (const auto &operation_idx : op_seq.operations())
-  {
-    const auto &node = _operations_ctx.at(operation_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
-  }
+  auto ret = std::make_unique<exec::FunctionSequence>();
+  ret->enableDynamicShapeInferer(false);
+
+  const auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  ret->append(releaseFunction());
+  return ret;
  }
  
  void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
@@ -90,12 +84,12 @@ void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
    }
    assert(axis_value >= 0 && axis_value < ifm_rank);
    const auto fixed_axis =
-      acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+    acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
    auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
                                               : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
  
    auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
-      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
+    ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -105,7 +99,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
    const auto block_size_index{
-      node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
  
    const auto NNApiInputs = 2;
    if (node.getInputs().size() != NNApiInputs)
@@ -133,7 +127,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    assert(_ctx.at(block_size_index).data());
  
    auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
-      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+    ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -156,29 +150,29 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
      case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
      {
        fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-          arm_compute::ConvertPolicy::SATURATE);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE);
        break;
      }
      case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
      {
        fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-          arm_compute::ConvertPolicy::SATURATE);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE);
        break;
      }
      case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
      {
        // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
        fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+        arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
        break;
      }
      case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
      {
        fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
        break;
      }
      default:
@@ -186,7 +180,7 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
        break;
    }
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -206,8 +200,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto ker_width = ker_shape.dim(2);
  
    const auto stride = node.param().stride;
-  const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
-                                            ker_width, ker_height);
+  const auto padding =
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
    const auto activation = node.param().activation;
  
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -219,9 +213,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto act_info = acl_common::asActivationLayerInfo(activation);
  
    auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
-      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
-      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+    ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+    ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -238,7 +232,7 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
    auto input_tensor = _tensor_reg->getAclTensor(input_index);
  
    auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
-      input_tensor->handle(), output_tensor->handle(), block_size);
+    input_tensor->handle(), output_tensor->handle(), block_size);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -262,8 +256,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto stride = node.param().stride;
    const auto dilation = node.param().dilation;
    const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width,
-                           ker_height, dilation.width_factor, dilation.height_factor);
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
    const auto multiplier = node.param().multiplier;
    const auto activation = node.param().activation;
  
@@ -277,8 +271,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
  
    auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
-      ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
-      conv_info, multiplier, act_info, dilation_info);
+    ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+    conv_info, multiplier, act_info, dilation_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -304,15 +298,15 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
    }
  
    auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
-  std::vector<::arm_compute::ITensor *> input_tensors;
+  std::vector<const ::arm_compute::ITensor *> input_tensors;
    for (const auto &ifm_ind : input_indexes)
      input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
  
    std::unique_ptr<::arm_compute::IFunction> fn;
    if (input_indexes.size() < 2)
    {
-    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
-                                                        output_tensor->handle());
+    ::arm_compute::ITensor *input_tesor = _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
+    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tesor, output_tensor->handle());
    }
    else
    {
@@ -320,9 +314,9 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
      const auto frontend_layout = _current_layout;
      const auto backend_layout = output_tensor->layout();
      const auto fixed_axis =
-        acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
+      acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
      fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
-        input_tensors, output_tensor->handle(), fixed_axis);
+      input_tensors, output_tensor->handle(), fixed_axis);
    }
  
    _return_fn = asAclFunction(std::move(fn));
@@ -336,12 +330,12 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
-  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
-      node.param().op_type, node.param().alpha, node.param().beta);
+  const ::arm_compute::ActivationLayerInfo act_info =
+    acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
  
    std::unique_ptr<arm_compute::IFunction> fn =
-      acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
-                                                                ofm_tensor->handle(), act_info);
+    acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+                                                              ofm_tensor->handle(), act_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -362,25 +356,25 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
      case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
      {
        fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
        break;
      }
      case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
      {
        fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
        break;
      }
      case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
      {
        fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
        break;
      }
      case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
      {
        fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
        break;
      }
      default:
@@ -408,10 +402,10 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
      case ir::operation::ElementwiseUnary::Type::ABS:
      {
        const ::arm_compute::ActivationLayerInfo act_info{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
  
        fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
-          input_tensor->handle(), output_tensor->handle(), act_info);
+        input_tensor->handle(), output_tensor->handle(), act_info);
        break;
      }
      case ir::operation::ElementwiseUnary::Type::CAST:
@@ -429,7 +423,7 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
        else
        {
          fn = acl_common::generateLayer<arm_compute::NECast>(
-            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+          input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
        }
        break;
      }
@@ -472,10 +466,10 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
      case ir::operation::ElementwiseUnary::Type::SQRT:
      {
        const ::arm_compute::ActivationLayerInfo act_info{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
  
        fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
-          input_tensor->handle(), output_tensor->handle(), act_info);
+        input_tensor->handle(), output_tensor->handle(), act_info);
        break;
      }
      default:
@@ -499,7 +493,7 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
    auto values_tensor = _tensor_reg->getAclTensor(values_index);
  
    auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
-      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+    values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -511,13 +505,13 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
    const auto activation = node.param().activation;
    if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
      throw std::runtime_error(
-        "KernelGenerator(acl_neon): FullyConnected 16x1Float32 weights is not supported.");
+      "KernelGenerator(acl_neon): FullyConnected 16x1Float32 weights is not supported.");
  
    auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
                                                  ::arm_compute::NEFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
+    node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
+    std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -537,8 +531,8 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
    auto values_tensor = _tensor_reg->getAclTensor(values_index);
  
    auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
-      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-      output_tensor->handle(), hits_tensor->handle());
+    lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+    output_tensor->handle(), hits_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -593,7 +587,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    }
  
    auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
-      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+    ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
  
    // Revert disabling applied dim_correction
    if (ifm_tensor->dimension(0) == 1)
@@ -623,11 +617,11 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
    auto activation = node.param().activation;
  
    auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
-      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
-      epsilon);
+    ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+    epsilon);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -644,10 +638,10 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
    // TODO Support optional constant dimension that normalization would be performed on
    const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
    int32_t radius =
-      2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
-  float alpha = 1.0f;                            // In the implementation to make alpha_ become 1
-  float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
-  float bias = 0.0f;                             // Don't offset the reduction.
+    2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+  float alpha = 1.0f;                          // In the implementation to make alpha_ become 1
+  float beta = 0.5f;                           // pow(reduction, -0.5) = 1 / sqrt(reduction)
+  float bias = 0.0f;                           // Don't offset the reduction.
  
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
@@ -656,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
                                                                 radius, alpha, beta, bias, false);
  
    auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -665,7 +659,7 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
  {
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{
-      node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+    node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
  
    auto radius = node.param().radius;
    auto alpha = node.param().alpha;
@@ -676,10 +670,10 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(
-      ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+    ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
  
    auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -761,7 +755,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
      const auto frontend_layout = _current_layout;
      const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
      const auto axis =
-        acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
+      acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
      padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
    }
  
@@ -769,12 +763,12 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
    UNUSED_RELEASE(input_type);
    assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
    assert(input->info()->quantization_info() ==
-         ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()));
+         ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point()));
    const auto pixel_value =
-      ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
+    ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
  
    auto fn =
-      acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
+    acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -782,14 +776,14 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
  void KernelGenerator::visit(const ir::operation::Pool2D &node)
  {
    auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
+    node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
  
    const auto ofm_index{node.getOutputs().at(0)};
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
    const auto activation = node.param().activation;
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(raw_fn)),
+    ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -838,7 +832,7 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
    auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
  
    auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
-      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+    ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -858,7 +852,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    const auto frontend_layout = _current_layout;
    const auto backend_layout = input_tensor->layout();
    const auto reduce_axes =
-      acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
+    acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
    const auto reduce_type = node.param().reduce_type;
    const auto keep_dims = node.param().keep_dims;
  
@@ -876,8 +870,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    else
    {
      fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
-        input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
-        acl_common::convertReduceType(reduce_type));
+      input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+      acl_common::convertReduceType(reduce_type));
    }
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -914,9 +908,11 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    auto fn = acl_common::generateLayer<arm_compute::NEScale>(
-      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
-      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
-      ::arm_compute::SamplingPolicy::TOP_LEFT);
+    ifm_tensor->handle(), ofm_tensor->handle(),
+    ::arm_compute::ScaleKernelInfo{::arm_compute::InterpolationPolicy::BILINEAR,
+                                   ::arm_compute::BorderMode::REPLICATE,
+                                   ::arm_compute::PixelValue(0.f),
+                                   ::arm_compute::SamplingPolicy::TOP_LEFT, false /*use padding*/});
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -925,12 +921,12 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
  {
    const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
    const auto hidden_state_out_index{
-      node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+    node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
  
    const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
    const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
    const auto recurrent_weights_index{
-      node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+    node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
    const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
    const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
  
@@ -947,13 +943,13 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
    auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
  
    auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
-      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+    hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
    _return_fn = asAclFunction(std::move(copy_layer));
  
    auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
-      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+    hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
    _return_fn = asAclFunction(std::move(fn));
  }
  
@@ -985,22 +981,10 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
    auto output_tensor = _tensor_reg->getAclTensor(output_index);
    auto input_tensor = _tensor_reg->getAclTensor(input_index);
  
-  // Disable applied dim_correction
-  if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
-  {
-    // This means that high dimension's value is 1 and input tensor is applied dim_correction
-    acl_common::disableDimCorrection(input_tensor);
-  }
-
+  // NOTE NESoftmaxLayer's default axis is -1
    auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      output_tensor->handle(), beta);
-
-  // Revert disabling applied dim_correction
-  if (input_tensor->dimension(0) == 1)
-  {
-    acl_common::disableDimCorrection(input_tensor);
-  }
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    output_tensor->handle(), beta);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1010,7 +994,7 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
    const auto block_size_index{
-      node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
    const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
  
    auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -1022,8 +1006,8 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
    assert(_ctx.at(paddings_index).data());
  
    auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
-      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-      ofm_tensor->handle());
+    ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+    ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1039,7 +1023,7 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
    auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
  
    auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+    ifm_tensor->handle(), ofm_tensor->handle(), block_size);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1074,7 +1058,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
    axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
  
    auto fn =
-      acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
+    acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1090,7 +1074,7 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
    auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
  
    auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
-      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+    lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1134,7 +1118,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
      {
        auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
                                                                   backend_layout)
-                      .value();
+                    .value();
  
        int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
        starts[axis] = begin_value;
@@ -1154,7 +1138,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
    }
  
    auto fn = acl_common::generateLayer<arm_compute::NESlice>(
-      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1206,7 +1190,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
      {
        auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
                                                                   backend_layout)
-                      .value();
+                    .value();
  
        int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
        starts[axis] = start_value;
@@ -1224,7 +1208,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
    const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
    const auto shrink_axis_mask =
-      acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
+    acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
  
    ::arm_compute::Coordinates starts_set;
    ::arm_compute::Coordinates ends_set;
@@ -1238,18 +1222,19 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    }
  
    // Disable applied dim_correction
-  if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
+  if (static_cast<size_t>(inputData_tensor->getShape().rank()) !=
+      inputData_tensor->info()->num_dimensions())
    {
      // This means that high dimension's value is 1 and input tensor is applied dim_correction
      acl_common::disableDimCorrection(inputData_tensor);
    }
  
    auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
-      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
-      begin_mask, end_mask, shrink_axis_mask);
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+    begin_mask, end_mask, shrink_axis_mask);
  
    // Revert disabling applied dim_correction
-  if (inputData_tensor->dimension(0) == 1)
+  if (inputData_tensor->getShape().dim(0) == 1)
    {
      acl_common::enableDimCorrection(inputData_tensor);
    }
@@ -1279,7 +1264,7 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
    if (node.param().padding.type == ir::PaddingType::VALID)
    {
      invalid_horizontal =
-        ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+      ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
      invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
    }
  
@@ -1290,8 +1275,8 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
    const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
  
    auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
-      ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
-      invalid_horizontal, invalid_vertical);
+    ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+    invalid_horizontal, invalid_vertical);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1335,7 +1320,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
    else
    {
      auto backend_pv =
-        acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
+      acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
  
      fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
                                                             ofm_tensor->handle(), backend_pv);
@@ -1366,17 +1351,18 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
    axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
  
    // Disable applied dim_correction
-  if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
+  if (static_cast<size_t>(input_tensor->getShape().rank()) !=
+      input_tensor->info()->num_dimensions())
    {
      // This means that high dimension's value is 1 and input tensor is applied dim_correction
      acl_common::disableDimCorrection(input_tensor);
    }
  
    auto fn =
-      acl_common::generateLayer<arm_compute::NEUnstack>(input_tensor->handle(), outputs, axis);
+    acl_common::generateLayer<arm_compute::NEUnstack>(input_tensor->handle(), outputs, axis);
  
    // Revert disabling applied dim_correction
-  if (input_tensor->dimension(0) == 1)
+  if (input_tensor->getShape().dim(0) == 1)
    {
      acl_common::enableDimCorrection(input_tensor);
    }
@@ -1411,8 +1397,8 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
    auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
  
    auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
-      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-      (arm_compute::ComparisonOperation)comparison_type);
+    input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+    (arm_compute::ComparisonOperation)comparison_type);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1438,8 +1424,8 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
    axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
  
    auto fn = acl_common::generateLayer<arm_compute::NEOneHot>(
-      indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
-      offvalue_tensor->handle(), output_tensor->handle(), axis);
+    indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+    offvalue_tensor->handle(), output_tensor->handle(), axis);
    _return_fn = asAclFunction(std::move(fn));
  }
  
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h

index 2a4b307b88f6c180b4ed75cf88d63ba1b9e7b223..0ccf2132895eca1e6947c134da5e12e933661f4d 100644 (file)
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
  #define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
  
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
  
  #include "ir/Operands.h"
  #include "TensorBuilder.h"
@@ -31,15 +31,15 @@ namespace backend
  namespace acl_neon
  {
  
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
  {
  public:
-  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
                    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
  
-  void visit(const ir::OpSequence &) override;
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
  
+private:
    void visit(const ir::operation::ArgMinMax &) override;
    void visit(const ir::operation::BatchToSpaceND &) override;
    void visit(const ir::operation::BinaryArithmetic &) override;
@@ -85,9 +85,9 @@ public:
  private:
    const ir::Operands &_ctx;
    const ir::Operations &_operations_ctx;
+  const ir::Layout _current_layout;
    std::shared_ptr<TensorBuilder> _tensor_builder;
    std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
-  ir::Layout _current_layout;
  };
  
  } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc

index ac80901cc2e76c24030612b1d644a7c9e87f6b37..781103f9c41175a47d10f97bc87d9ac0bf65c6af 100644 (file)
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -31,8 +31,8 @@ namespace acl_neon
  {
  
  Optimizer::Optimizer(BackendContext *context)
-    : _context{context},
-      _tensor_builder{std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
+  : _context{context}, _tensor_builder{
+                         std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
  {
    assert(context);
  }
@@ -42,14 +42,12 @@ void Optimizer::optimize()
    // Concat elimination (build subtensor info)
    {
      acl_common::AclSubTensorAnalyzer sa{*_context->graph()};
-    for (auto op_info : _context->operation_list())
-    {
-      auto &op = _context->graph()->operations().at(op_info.index);
-      sa.setLayout(op_info.layout);
-      op.accept(sa);
-    }
-
-    _tensor_builder->parent_map(sa.releaseParentMap());
+    sa.setUsePadding();
+    _context->graph()->operations().iterate(
+      [&](const ir::OperationIndex &, const ir::Operation &op) {
+        sa.setLayout(_context->graph()->layout());
+        op.accept(sa);
+      });
    }
  }
  
diff --git a/runtime/onert/backend/acl_neon/TensorBuilder.h b/runtime/onert/backend/acl_neon/TensorBuilder.h

index 070dc20ac860b8057cd8eea83e232e0f06b05026..7b6e8406badaed70b0b5754a6700b52ced9ea0a3 100644 (file)
--- a/runtime/onert/backend/acl_neon/TensorBuilder.h
+++ b/runtime/onert/backend/acl_neon/TensorBuilder.h
@@ -30,7 +30,7 @@ namespace acl_neon
  {
  
  using TensorBuilder =
-    acl_common::AclTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>;
+  acl_common::AclTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>;
  
  } // namespace acl_neon
  } // namespace backend
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h

index 3b7cfbcfd366667564d810eff028ec5348c6d457..5ecc0fbb3d2425c3029c88442778acfea891c27c 100644 (file)
--- a/runtime/onert/backend/acl_neon/TensorManager.h
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -41,16 +41,16 @@ namespace acl_neon
  {
  
  using MemoryManager =
-    acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>;
+  acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>;
  
  using LinearMemoryManager = acl_common::AclLinearMemoryManager<
-    operand::INETensor, operand::NETensor, operand::NESubTensor,
-    ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
-    ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator, ::arm_compute::MemoryGroup>;
+  operand::INETensor, operand::NETensor, operand::NESubTensor, ::arm_compute::MemoryManagerOnDemand,
+  ::arm_compute::PoolManager, ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator,
+  ::arm_compute::MemoryGroup>;
  
  using InternalBufferManager = acl_common::AclInternalBufferManager<
-    ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
-    ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>;
+  ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+  ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>;
  
  using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
                                                     operand::NESubTensor>;
diff --git a/runtime/onert/backend/acl_neon/operand/INETensor.h b/runtime/onert/backend/acl_neon/operand/INETensor.h

index db0ce6fdc7daba735dfda5d574984b7dbf97a9d5..3747b12b7b119f878e79e4c82db42c252f2678d5 100644 (file)
--- a/runtime/onert/backend/acl_neon/operand/INETensor.h
+++ b/runtime/onert/backend/acl_neon/operand/INETensor.h
@@ -33,6 +33,7 @@ namespace operand
  class INETensor : public acl_common::IACLTensor
  {
  public:
+  INETensor(size_t rank) : IACLTensor{rank} {}
    const arm_compute::ITensor *handle() const override = 0;
    arm_compute::ITensor *handle() override = 0;
    void access(const std::function<void(ITensor &tensor)> &fn) final;
diff --git a/runtime/onert/backend/acl_neon/operand/NESubTensor.cc b/runtime/onert/backend/acl_neon/operand/NESubTensor.cc

index 457addd55068b5a491a9d80abb21dd883afe437a..fe82f6206461d17ad8ccc2995fbc4b5929625a21 100644 (file)
--- a/runtime/onert/backend/acl_neon/operand/NESubTensor.cc
+++ b/runtime/onert/backend/acl_neon/operand/NESubTensor.cc
@@ -27,9 +27,8 @@ namespace operand
  
  NESubTensor::NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
                           const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
-    : _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(parent->handle(), tensor_shape,
-                                                              coords, extend_parent)),
-      _rank{rank}
+  : INETensor{rank}, _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(
+                       parent->handle(), tensor_shape, coords, extend_parent))
  {
    // DO NOTHING
  }
diff --git a/runtime/onert/backend/acl_neon/operand/NESubTensor.h b/runtime/onert/backend/acl_neon/operand/NESubTensor.h

index 9944e4ba08948de588f0eb12693f8262cadbf54e..74dbe901103790582d706d3c281540fbd0bc0a9c 100644 (file)
--- a/runtime/onert/backend/acl_neon/operand/NESubTensor.h
+++ b/runtime/onert/backend/acl_neon/operand/NESubTensor.h
@@ -38,9 +38,6 @@ public:
    NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
                const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
  
-public:
-  size_t num_dimensions() const final { return _rank; }
-
  public:
    const arm_compute::SubTensor *handle() const override;
    arm_compute::SubTensor *handle() override;
@@ -52,7 +49,6 @@ public:
  
  private:
    std::shared_ptr<arm_compute::SubTensor> _ne_sub_tensor;
-  size_t _rank;
  };
  
  } // namespace operand
diff --git a/runtime/onert/backend/acl_neon/operand/NETensor.cc b/runtime/onert/backend/acl_neon/operand/NETensor.cc

index 53dbb3021dcdde2d3fd2b7fc7af51289517864c5..4b237d731f131329593e258bbc8d4b55f09e08ff 100644 (file)
--- a/runtime/onert/backend/acl_neon/operand/NETensor.cc
+++ b/runtime/onert/backend/acl_neon/operand/NETensor.cc
@@ -28,7 +28,7 @@ namespace operand
  {
  
  NETensor::NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
-    : _ne_tensor(std::make_shared<arm_compute::Tensor>()), _rank{rank}, _num_uses{num_uses}
+  : INETensor{rank}, _ne_tensor(std::make_shared<arm_compute::Tensor>()), _num_uses{num_uses}
  {
    allocator()->init(info);
  }
diff --git a/runtime/onert/backend/acl_neon/operand/NETensor.h b/runtime/onert/backend/acl_neon/operand/NETensor.h

index 0dd81afeca6c54b52ede90d3fd6d5fa9cb0ce336..69f8b2111452972579ad5df507b3d1de84deaf5f 100644 (file)
--- a/runtime/onert/backend/acl_neon/operand/NETensor.h
+++ b/runtime/onert/backend/acl_neon/operand/NETensor.h
@@ -39,9 +39,6 @@ public:
  public:
    NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
  
-public:
-  size_t num_dimensions() const final { return _rank; }
-
  public:
    const arm_compute::Tensor *handle() const override;
    arm_compute::Tensor *handle() override;
@@ -52,7 +49,6 @@ public:
  
  private:
    std::shared_ptr<arm_compute::Tensor> _ne_tensor;
-  size_t _rank;
    size_t _num_uses;
  };
  
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h

index 0b416a7e99ffc19d1c3eca64ef7fedfbb73c518e..398c188a897f927cc2a5a3238645fee387afc638 100644 (file)
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -19,7 +19,6 @@
  
  #include "BackendContext.h"
  #include "Config.h"
-#include "ConstantInitializer.h"
  #include "KernelGenerator.h"
  
  #include <backend/Backend.h>
@@ -40,19 +39,16 @@ public:
  
    std::shared_ptr<IConfig> config() const override { return _config; }
  
-  std::unique_ptr<onert::backend::BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
-             bool) const override
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
    {
-    const auto &operands = graph.operands();
-    const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto custom_kernel_builder = data.custom_kernel_builder;
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
      auto tb = std::make_shared<TensorBuilder>(tr);
      context->tensor_registry = tr;
      context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
                                                              context->external_context());
      return context;
    }
diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc

index 6b958c1b7f395fc048a00d54b65d25279878cc3b..e6f7b847055578dd4948b29206c8cc8e52f9ee08 100644 (file)
--- a/runtime/onert/backend/cpu/BackendContext.cc
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -22,7 +22,7 @@
  #include "ir/Index.h"
  #include "ir/OperandIndexMap.h"
  #include "ir/OperandIndexSequence.h"
-#include "backend/cpu_common/BackendContextHelpers.h"
+#include "backend/basic/BackendContextHelpers.h"
  
  namespace onert
  {
@@ -31,107 +31,24 @@ namespace backend
  namespace cpu
  {
  
-void BackendContext::initConsts()
-{
-  for (auto &op : operation_list())
-  {
-    constant_initializer->setLayout(op.layout);
-    graph()->operations().at(op.index).accept(*constant_initializer);
-  }
-
-  for (auto ind : operand_list())
-  {
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  }
-
-  constant_initializer->run();
-}
-
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                            const ir::OpSequences &op_seqs,
-                                            const ir::LowerInfoMap &lower_info)
-{
-  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
-                  ir::Remove::DUPLICATED;
-  for (auto index : operand_list())
-  {
-    if (model_io.contains(index))
-      continue;
-    const auto &obj = graph()->operands().at(index);
-    const auto frontend_layout = [&]() {
-      if (obj.getUses().size() == 0)
-        return ir::Layout::UNKNOWN;
-      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
-      for (auto &operation_info : operation_list())
-      {
-        if (operation_info.index == use_op_ind)
-          return operation_info.layout;
-      }
-      return ir::Layout::UNKNOWN;
-    }();
-    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
-    if (permute_factor.backend() != backend())
-      continue;
-    const auto backend_layout = permute_factor.layout();
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
-  }
-
-  // TODO Get compiler options from compiler, and use it rather than getting it from Env
-  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
-  {
-    cpu_common::planTensors(*this, order, op_seqs, lower_info);
-  }
-  else
-  {
-    // For the executors that does not have fixed linear execution order:
-    // To make tensors never be deallocated, this is a workaround to use static memory planner
-    for (auto ind : operand_list())
-    {
-      if (tensor_builder->isRegistered(ind))
-        tensor_builder->notifyFirstUse(ind);
-    }
-  }
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
  
-  tensor_builder->prepare();
-
-  return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                       const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
  {
    FunctionMap ret;
  
-  for (auto op_seq_ind : order)
+  for (auto op_ind : _data.op_order)
    {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    bool assigned = [&]() {
-      for (auto op_info : operation_list())
-        if (op_seq.exist(op_info.index))
-          return true;
-      return false;
-    }();
-    if (!assigned)
-      continue;
-    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
-    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
    }
  
-  initConsts();
+  basic::initConsts(*this);
  
    // NOTE For memory optimization, we want to free some operand data
-  for (auto ind : operand_list())
-  {
-    // TODO Remove const_cast
-    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
-    obj.releaseData();
-  }
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
  
    for (auto &it : ret)
    {
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h

index 0a4106d33fe321145aa39405321cf7e416b5d0cc..69ab30c827fe90cc37ee9720251f24a798babe7c 100644 (file)
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -19,7 +19,6 @@
  
  #include <backend/BackendContext.h>
  #include "TensorBuilder.h"
-#include "ConstantInitializer.h"
  #include "KernelGenerator.h"
  #include "ExternalContext.h"
  
@@ -33,34 +32,23 @@ namespace cpu
  class BackendContext : public onert::backend::BackendContext
  {
  public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
+  BackendContext(const Backend *backend, ContextData &&data,
                   std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                   std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
                   std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry),
-        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
-        kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(new ExternalContext)
    {
    }
  
-  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                              const ir::OpSequences &op_seqs,
-                              const ir::LowerInfoMap &lower_info) override;
-  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
-                         const ir::OpSequences &op_seqs) override;
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
  
    std::shared_ptr<ExternalContext> external_context() { return _external_context; }
  
-private:
-  void initConsts();
-  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
-
  public:
    // TODO Make it private
    std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
    std::shared_ptr<KernelGenerator> kernel_gen;
  
  private:
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h

deleted file mode 100644 (file)

index d7858c0..0000000
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
-
-#include <backend/cpu_common/ConstantInitializer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-using ConstantInitializer = cpu_common::ConstantInitializer;
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h

index f5d11f4f1bcfcb998c9be657edd76a0e0517fbb0..ab0bb5f10dba82719b3c5adde38331bf84415e96 100644 (file)
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -20,11 +20,6 @@
  #include <util/ConfigSource.h>
  #include <ruy/context.h>
  
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 1;
-}
-
  namespace onert
  {
  namespace backend
@@ -34,6 +29,9 @@ namespace cpu
  
  class ExternalContext
  {
+private:
+  static const int kDefaultNumThreadpoolThreads = 1;
+
  public:
    ExternalContext() : _ruy_context(new ruy::Context)
    {
@@ -43,7 +41,7 @@ public:
    void setMaxNumThreads(int max_num_threads)
    {
      const int target_num_threads =
-        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+      max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
      _ruy_context->set_max_num_threads(target_num_threads);
    }
  
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc

index 25756eced88f9a411f076298ec3c2075bebfcc44..d5096ff09759f8a5d88df3c15b92403f1f48a215 100644 (file)
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -41,6 +41,7 @@
  #include "ops/PadLayer.h"
  #include "ops/PoolLayer.h"
  #include "ops/PowLayer.h"
+#include "ops/QuantizeLayer.h"
  #include "ops/RangeLayer.h"
  #include "ops/RankLayer.h"
  #include "ops/ReduceLayer.h"
@@ -221,78 +222,74 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
  } // namespace
  
  KernelGenerator::KernelGenerator(
-    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-    const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
-    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
-    const std::shared_ptr<ExternalContext> &external_context)
-    : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
-      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+  const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+  const std::shared_ptr<ExternalContext> &external_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
  {
    // DO NOTHING
  }
  
-void KernelGenerator::visit(const ir::operation::AddN &node)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
  {
-  const auto output_index{node.getOutputs().at(0)};
-
-  std::vector<const IPortableTensor *> input_tensors;
-  for (auto &input_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
-
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
-
-  auto fn = std::make_unique<ops::AddNLayer>();
-
-  fn->configure(std::move(input_tensors), output_tensor);
-
-  _return_fn = std::move(fn);
-}
+  auto ret = std::make_unique<exec::FunctionSequence>();
  
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
-  assert(!_return_fn_seq);
    assert(_tensor_builder->dynamicTensorManager());
    assert(_tensor_reg);
  
    auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
  
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
    // Prepare to handle dynamic tensors later
    auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
    {
-    dyn_ctx->op_seq = &op_seq;
+    dyn_ctx->op_ind = ind;
      dyn_ctx->operations = &_operations_ctx;
      dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
  
-    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+    ret->dynamic_tensor_ctx(dyn_ctx);
    }
  
-  _current_layout = op_seq.getLayout();
-  for (const auto &operation_idx : op_seq.operations())
+  auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  assert(_return_fn); // _return_fn must have been generated
+  ret->append(std::move(_return_fn));
+
+  for (auto ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
    {
-    const auto &node = _operations_ctx.at(operation_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
+    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+    if (portable_tensor)
+    {
+      assert(portable_tensor->layout() == ir::Layout::NHWC);
+    }
  
-    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+    auto tensor = _tensor_reg->getNativeTensor(ind);
+    if (tensor)
      {
-      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
-      if (portable_tensor)
-      {
-        assert(portable_tensor->layout() == ir::Layout::NHWC);
-      }
-
-      auto tensor = _tensor_reg->getNativeTensor(ind);
-      if (tensor)
-      {
-        tensor->increase_ref();
-      }
+      tensor->increase_ref();
      }
    }
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::AddN &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+
+  std::vector<const IPortableTensor *> input_tensors;
+  for (auto &input_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+  auto fn = std::make_unique<ops::AddNLayer>();
+
+  fn->configure(std::move(input_tensors), output_tensor);
+
+  _return_fn = std::move(fn);
  }
  
  void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -333,8 +330,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto ker_width = ker_shape.dim(2);
  
    const auto padding =
-      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
-                           dilation.width_factor, dilation.height_factor);
+    ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
  
    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
                  padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
@@ -615,7 +612,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
    auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
  
    assert(indices_tensor->data_type() == OperandType::INT32);
-  assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
+  assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
  
    auto fn = std::make_unique<ops::OneHotLayer>();
  
@@ -712,11 +709,18 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
    auto output_tensor = _tensor_reg->getPortableTensor(output_index);
    auto input_tensor = _tensor_reg->getPortableTensor(input_index);
  
-  auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
-
-  fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
-
-  _return_fn = std::move(fn);
+  if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
+  {
+    auto fn = std::make_unique<ops::QuantizeLayer>();
+    fn->configure(input_tensor, output_tensor);
+    _return_fn = std::move(fn);
+  }
+  else
+  {
+    auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
+    fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
+    _return_fn = std::move(fn);
+  }
  }
  
  void KernelGenerator::visit(const ir::operation::ExpandDims &node)
@@ -1041,7 +1045,7 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
    const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
    const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
    const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
    const auto activation = node.param().activation;
  
    auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
@@ -1337,49 +1341,49 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
  void KernelGenerator::visit(const ir::operation::LSTM &node)
  {
    const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+    node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
    const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+    node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
    const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+    node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
    const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
  
    const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
    const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
    const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
    const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
    const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
    const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
    const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
    const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
    const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
    const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
    const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
    const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
    const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
    const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
    const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
    const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
    const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
    const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
    const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
    const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
    const auto time_major = node.param().time_major;
  
@@ -1391,9 +1395,9 @@ void KernelGenerator::visit(const ir::operation::LSTM &node)
                                      (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
                                       _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
    bool has_recurrent_to_input_weights =
-      _ctx.exist(recurrent_to_input_weights_index) &&
-      (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-       _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+    _ctx.exist(recurrent_to_input_weights_index) &&
+    (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+     _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
  
    // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
    // But the cell_to_input_weights does not exist in regular CIFG although peephole.
@@ -1405,56 +1409,56 @@ void KernelGenerator::visit(const ir::operation::LSTM &node)
                                      _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
  
    bool has_input_gate_bias =
-      _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
+    _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
  
    bool has_projection_weights = _ctx.exist(projection_weights_index) &&
                                  (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
                                   _ctx.at(projection_weights_index).shape().dim(1) != 0);
    bool has_projection_bias =
-      _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
+    _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
  
    auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
-                                   ? _tensor_reg->getPortableTensor(scratch_buffer_index)
-                                   : nullptr; // optional
+                                 ? _tensor_reg->getPortableTensor(scratch_buffer_index)
+                                 : nullptr; // optional
    auto output_state_out_tensor = _ctx.exist(output_state_out_index)
-                                     ? _tensor_reg->getPortableTensor(output_state_out_index)
-                                     : nullptr; // optional
-  auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
-                                   ? _tensor_reg->getPortableTensor(cell_state_out_index)
+                                   ? _tensor_reg->getPortableTensor(output_state_out_index)
                                     : nullptr; // optional
+  auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
+                                 ? _tensor_reg->getPortableTensor(cell_state_out_index)
+                                 : nullptr; // optional
    auto output_tensor = _tensor_reg->getPortableTensor(output_index);
  
    auto input_tensor = _tensor_reg->getPortableTensor(input_index);
  
    auto input_to_input_weights_tensor =
-      has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
-                                 : nullptr; // optional
+    has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
+                               : nullptr; // optional
    auto input_to_forget_weights_tensor =
-      _tensor_reg->getPortableTensor(input_to_forget_weights_index);
+    _tensor_reg->getPortableTensor(input_to_forget_weights_index);
    auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
    auto input_to_output_weights_tensor =
-      _tensor_reg->getPortableTensor(input_to_output_weights_index);
+    _tensor_reg->getPortableTensor(input_to_output_weights_index);
    auto recurrent_to_input_weights_tensor =
-      has_recurrent_to_input_weights
-          ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
-          : nullptr; // optional
+    has_recurrent_to_input_weights
+      ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
+      : nullptr; // optional
    auto recurrent_to_forget_weights_tensor =
-      _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
+    _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
    auto recurrent_to_cell_weights_tensor =
-      _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
+    _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
    auto recurrent_to_output_weights_tensor =
-      _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
+    _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
  
    auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
    auto cell_to_forget_weights_tensor =
-      has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
-                                 : nullptr; // optional
+    has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
+                               : nullptr; // optional
    auto cell_to_output_weights_tensor =
-      has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
-                                 : nullptr; // optional
+    has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
+                               : nullptr; // optional
  
    auto input_gate_bias_tensor =
-      has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
+    has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
    auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
    auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
    auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
@@ -1462,11 +1466,11 @@ void KernelGenerator::visit(const ir::operation::LSTM &node)
    auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
  
    auto projection_weights_tensor = has_projection_weights
-                                       ? _tensor_reg->getPortableTensor(projection_weights_index)
-                                       : nullptr; // optional
+                                     ? _tensor_reg->getPortableTensor(projection_weights_index)
+                                     : nullptr; // optional
    auto projection_bias_tensor = has_projection_bias
-                                    ? _tensor_reg->getPortableTensor(projection_bias_index)
-                                    : nullptr; // optional
+                                  ? _tensor_reg->getPortableTensor(projection_bias_index)
+                                  : nullptr; // optional
  
    IPortableTensor *input_layer_norm_weights_tensor = nullptr;
    IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
@@ -1475,45 +1479,45 @@ void KernelGenerator::visit(const ir::operation::LSTM &node)
    if (node.getInputs().size() == 24)
    {
      const auto input_layer_norm_weights_index{
-        node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
      const auto forget_layer_norm_weights_index{
-        node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
+      node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
      const auto cell_layer_norm_weights_index{
-        node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
      const auto output_layer_norm_weights_index{
-        node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
+      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
  
      input_layer_norm_weights_tensor =
-        _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
+      _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
      forget_layer_norm_weights_tensor =
-        _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
+      _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
      cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
      output_layer_norm_weights_tensor =
-        _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
+      _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
    }
  
    auto fn = std::make_unique<ops::LSTMLayer>();
  
    fn->configure(
-      input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
-      input_to_cell_weights_tensor, input_to_output_weights_tensor,
-      recurrent_to_input_weights_tensor, recurrent_to_forget_weights_tensor,
-      recurrent_to_cell_weights_tensor, recurrent_to_output_weights_tensor,
-      cell_to_input_weights_tensor, cell_to_forget_weights_tensor, cell_to_output_weights_tensor,
-      input_layer_norm_weights_tensor, forget_layer_norm_weights_tensor,
-      cell_layer_norm_weights_tensor, output_layer_norm_weights_tensor,
-      /*aux_input=*/nullptr,
-      /*aux_input_to_input_weights=*/nullptr,
-      /*aux_input_to_forget_weights=*/nullptr,
-      /*aux_input_to_cell_weights=*/nullptr,
-      /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
-      cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
-      projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
-      /*forward_sequence=*/true, time_major,
-      /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
-      output_tensor,
-      !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
-      !_ctx.at(cell_state_in_index).info().isVariable());
+    input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
+    input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
+    recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
+    recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
+    cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
+    forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
+    output_layer_norm_weights_tensor,
+    /*aux_input=*/nullptr,
+    /*aux_input_to_input_weights=*/nullptr,
+    /*aux_input_to_forget_weights=*/nullptr,
+    /*aux_input_to_cell_weights=*/nullptr,
+    /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
+    cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
+    projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
+    /*forward_sequence=*/true, time_major,
+    /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
+    output_tensor,
+    !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
+    !_ctx.at(cell_state_in_index).info().isVariable());
  
    _return_fn = std::move(fn);
  }
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h

index 3a4cfbffa7aeda75184d32c98c1d399261e03510..d452d0ba6c2efb399f259e9ae6249fc5883eed5c 100644 (file)
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -19,11 +19,11 @@
  
  #include "ExternalContext.h"
  #include "TensorBuilder.h"
-#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/basic/TensorRegistry.h"
  #include "Tensor.h"
  
  #include <backend/CustomKernelBuilder.h>
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
  #include <ir/Operands.h>
  #include <ir/Operations.h>
  
@@ -34,16 +34,15 @@ namespace backend
  namespace cpu
  {
  
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
  {
  public:
-  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder,
-                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
                    const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                    const std::shared_ptr<ExternalContext> &external_context);
  
-  void visit(const ir::OpSequence &) override;
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex op_ind) override;
  
    void visit(const ir::operation::AddN &) override;
    void visit(const ir::operation::ArgMinMax &) override;
@@ -100,10 +99,10 @@ public:
  private:
    const ir::Operands &_ctx;
    const ir::Operations &_operations_ctx;
+  ir::Layout _current_layout;
    std::shared_ptr<TensorBuilder> _tensor_builder;
-  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
    std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
-  ir::Layout _current_layout;
    const std::shared_ptr<ExternalContext> _external_context;
  };
  
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h

index d07f0c814a406805085468c1509d71ed4e398533..bcbb569ea17a1aff833325aa0449fdf7a4951f76 100644 (file)
--- a/runtime/onert/backend/cpu/StaticTensorManager.h
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
  #define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
  
-#include "backend/cpu_common/StaticTensorManager.h"
+#include "backend/basic/StaticTensorManager.h"
  
  namespace onert
  {
@@ -26,7 +26,7 @@ namespace backend
  namespace cpu
  {
  
-using StaticTensorManager = cpu_common::StaticTensorManager;
+using StaticTensorManager = basic::StaticTensorManager;
  
  } // namespace cpu
  } // namespace backend
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h

index d663c3f50bb510a41b6f8c444aa7b093e1f19a06..f42d3d06829bcb9946c1df4ccb0a161f9297d996 100644 (file)
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_BACKEND_CPU_TENSOR_H__
  #define __ONERT_BACKEND_CPU_TENSOR_H__
  
-#include <backend/cpu_common/Tensor.h>
+#include <backend/basic/Tensor.h>
  #include <ir/Data.h>
  
  namespace onert
@@ -27,8 +27,8 @@ namespace backend
  namespace cpu
  {
  
-using Tensor = cpu_common::Tensor;
-using ExternalTensor = cpu_common::ExternalTensor;
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
  
  } // namespace cpu
  } // namespace backend
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc

deleted file mode 100644 (file)

index e6bc55b..0000000
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
-    : _tensor_reg{tensor_reg},
-      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
-      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
-{
-  /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                       ir::Layout layout)
-{
-  _tensor_info_map.emplace(ind, info);
-
-  // CPU backend supports only one layout as NHWC
-  assert(layout == ir::Layout::NHWC);
-  if (info.isDynamic())
-  {
-    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
-  }
-  else
-  {
-    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
-  }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
-  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
-  const auto tensor_info = _tensor_info_map.at(ind);
-
-  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
-  {
-    const auto size = tensor_info.total_size();
-    _static_tensor_mgr->claimPlan(ind, size);
-  }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
-  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
-  {
-    _static_tensor_mgr->releasePlan(ind);
-  }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
-  return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
-  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
-  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h

index 9d8a5deb50c81409e52a3195a99c5c73c2457ed7..a7a410f1742772631ce418f979cd0cf558f37f29 100644 (file)
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -17,15 +17,7 @@
  #ifndef __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
  #define __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
  
-#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-
-#include <ir/OperandIndexMap.h>
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <unordered_map>
+#include <backend/basic/TensorBuilder.h>
  
  namespace onert
  {
@@ -34,37 +26,7 @@ namespace backend
  namespace cpu
  {
  
-class TensorBuilder
-{
-public:
-  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
-
-  /**
-   * @brief     Register tensor information to allocate on CPU backend
-   * @param[in] ind    Operand index
-   * @param[in] info   Operand information
-   * @param[in] layout Operand data layout
-   */
-  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout);
-
-  void notifyFirstUse(const ir::OperandIndex &);
-  void notifyLastUse(const ir::OperandIndex &);
-
-  bool isRegistered(const ir::OperandIndex &) const;
-
-  void prepare(void);
-  void allocate();
-  void postFunctionPrepare() { /* DO NOTHING */}
-
-  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
-
-private:
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
-  std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
-  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
-  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-};
+using TensorBuilder = basic::TensorBuilder;
  
  } // namespace cpu
  } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/AddNLayer.cc b/runtime/onert/backend/cpu/ops/AddNLayer.cc

index 5c0395dccb4a00d3e07a50a8c171c648915f07e2..96799129554da7c2cb03885f4d9f313ab4a86ea9 100644 (file)
--- a/runtime/onert/backend/cpu/ops/AddNLayer.cc
+++ b/runtime/onert/backend/cpu/ops/AddNLayer.cc
@@ -44,20 +44,18 @@ void AddNLayer::run()
      std::vector<const int32_t *> input_buffers(input_size);
      for (size_t i = 0; i < input_size; i++)
      {
-      input_buffers[i] = reinterpret_cast<int32_t *>(_inputs[i]->buffer());
+      input_buffers[i] = getBuffer<int32_t>(_inputs[i]);
      }
-    AddN(getTensorShape(_inputs[0]), input_size, input_buffers.data(),
-         reinterpret_cast<int32_t *>(_output->buffer()));
+    AddN(getShape(_inputs[0]), input_size, input_buffers.data(), getBuffer<int32_t>(_output));
    }
    else if (_output->data_type() == ir::DataType::FLOAT32)
    {
      std::vector<const float *> input_buffers(input_size);
      for (size_t i = 0; i < input_size; i++)
      {
-      input_buffers[i] = reinterpret_cast<float *>(_inputs[i]->buffer());
+      input_buffers[i] = getBuffer<float>(_inputs[i]);
      }
-    AddN(getTensorShape(_inputs[0]), input_size, input_buffers.data(),
-         reinterpret_cast<float *>(_output->buffer()));
+    AddN(getShape(_inputs[0]), input_size, input_buffers.data(), getBuffer<float>(_output));
    }
    else
    {
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc

index d5ffdef0bbd4f33c6288b15c040127720322c2ba..a1b8bfce360bd6505c07d9ca1ab51544a93eab49 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
@@ -42,7 +42,7 @@ template <typename T> std::function<bool(T, T)> GetComparefunction(bool is_arg_m
      return std::less<T>();
    }
  }
-}
+} // namespace
  
  void ArgMinMaxLayer::configure(const IPortableTensor *input, IPortableTensor *output,
                                 const IPortableTensor *axis, bool is_arg_max)
@@ -59,15 +59,14 @@ void ArgMinMaxLayer::run()
    {
      throw std::runtime_error("ArgMinMax: wrong shape of axis");
    }
-  auto axis = *reinterpret_cast<const int32_t *>(_axis->buffer());
+  auto axis = *getBuffer<int32_t>(_axis);
    if (axis < 0)
    {
-    axis += _input->num_dimensions();
+    axis += _input->getShape().rank();
    }
-#define TF_LITE_ARG_MIN_MAX(input_type, axis_type, output_type)                                \
-  ArgMinMax(getTensorShape(_input), reinterpret_cast<const input_type *>(_input->buffer()),    \
-            getTensorShape(_output), reinterpret_cast<output_type *>(_output->buffer()), axis, \
-            GetComparefunction<input_type>(_is_arg_max));
+#define TF_LITE_ARG_MIN_MAX(input_type, axis_type, output_type)                 \
+  ArgMinMax(getShape(_input), getBuffer<input_type>(_input), getShape(_output), \
+            getBuffer<output_type>(_output), axis, GetComparefunction<input_type>(_is_arg_max));
    if (_output->data_type() == ir::DataType::INT32)
    {
      switch (_input->data_type())
diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc

index ba96559248c5fcd07e3bf7e10ccd582d0fe897a6..3b08fd5b15d33d28bdaa950a3433d5b1747fbbd0 100644 (file)
--- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
@@ -28,8 +28,8 @@ namespace ops
  {
  
  BatchMatMulLayer::BatchMatMulLayer()
-    : _lhs(nullptr), _rhs(nullptr), _output(nullptr), _adj_x(false), _adj_y(false),
-      _kernel(new nnfw::cker::BatchMatMul())
+  : _lhs(nullptr), _rhs(nullptr), _output(nullptr), _adj_x(false), _adj_y(false),
+    _kernel(new nnfw::cker::BatchMatMul())
  {
    // DO NOTHING
  }
@@ -39,16 +39,15 @@ BatchMatMulLayer::~BatchMatMulLayer() = default;
  void BatchMatMulLayer::batchMatMulFloat32()
  {
    nnfw::cker::BatchMatMul &batchmatmul_kernel = *_kernel;
-  nnfw::cker::Shape lhs_shape = getTensorShape(_lhs);
-  nnfw::cker::Shape rhs_shape = getTensorShape(_rhs);
-  nnfw::cker::Shape output_shape = getTensorShape(_output);
+  nnfw::cker::Shape lhs_shape = getShape(_lhs);
+  nnfw::cker::Shape rhs_shape = getShape(_rhs);
+  nnfw::cker::Shape output_shape = getShape(_output);
  
    // TODO implement for constant input
  
    batchmatmul_kernel.prepare(lhs_shape, rhs_shape, _adj_x, _adj_y);
-  batchmatmul_kernel(lhs_shape, reinterpret_cast<const float *>(_lhs->buffer()), rhs_shape,
-                     reinterpret_cast<const float *>(_rhs->buffer()), _adj_x, _adj_y, output_shape,
-                     reinterpret_cast<float *>(_output->buffer()));
+  batchmatmul_kernel(lhs_shape, getBuffer<float>(_lhs), rhs_shape, getBuffer<float>(_rhs), _adj_x,
+                     _adj_y, output_shape, getBuffer<float>(_output));
  }
  
  void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, bool adj_x,
diff --git a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc

index f2f10eb9d5621db91ed261bfbc918992d76e9bed..2609481fb8593a46edea3e092fdc148c092641ba 100644 (file)
--- a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
@@ -28,7 +28,7 @@ namespace ops
  {
  
  BatchToSpaceNDLayer::BatchToSpaceNDLayer()
-    : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
+  : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
  {
    // DO NOTHING
  }
@@ -44,12 +44,11 @@ template <typename T> void BatchToSpaceNDLayer::batchToSpaceNDGeneric()
    }
    else
    {
-    _crops_buffer = reinterpret_cast<const int32_t *>(_crops->buffer());
+    _crops_buffer = getBuffer<int32_t>(_crops);
    }
-  nnfw::cker::BatchToSpaceND<T>(
-      getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
-      reinterpret_cast<const int32_t *>(_block_shape->buffer()), _crops_buffer,
-      getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::BatchToSpaceND<T>(getShape(_input), getBuffer<T>(_input),
+                                getBuffer<int32_t>(_block_shape), _crops_buffer, getShape(_output),
+                                getBuffer<T>(_output));
  }
  
  void BatchToSpaceNDLayer::configure(const IPortableTensor *input, IPortableTensor *output,
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc

index ff1126932be641ec26ab3ae3d0487853f45581b5..e0d5a3ccb120b5be97410a0ce69eecaedf2b914f 100644 (file)
--- a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -40,7 +40,7 @@ template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T> struct
  
    Eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
         nnfw::cker::BinaryArithmeticOpParam op_params)
-      : _op_params(std::move(op_params)), _need_broadcast(false)
+    : _op_params(std::move(op_params)), _need_broadcast(false)
    {
      if (!output->is_dynamic())
        updateCache(lhs, rhs, output);
@@ -48,9 +48,9 @@ template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T> struct
  
    void updateCache(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
    {
-    _lhs_shape.ReplaceWith(getTensorShape(lhs));
-    _rhs_shape.ReplaceWith(getTensorShape(rhs));
-    _output_shape.ReplaceWith(getTensorShape(output));
+    _lhs_shape.ReplaceWith(getShape(lhs));
+    _rhs_shape.ReplaceWith(getShape(rhs));
+    _output_shape.ReplaceWith(getShape(output));
      _need_broadcast = nnfw::cker::ProcessBroadcastShapes(_lhs_shape, _rhs_shape, &_op_params);
    }
  
@@ -61,20 +61,20 @@ template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T> struct
      if (output->is_dynamic())
        updateCache(lhs, rhs, output);
      else
-      assert(_lhs_shape == getTensorShape(lhs) && _rhs_shape == getTensorShape(rhs) &&
-             _output_shape == getTensorShape(output));
-    auto lhs_buffer = reinterpret_cast<const T *>(lhs->buffer());
-    auto rhs_buffer = reinterpret_cast<const T *>(rhs->buffer());
-    auto output_buffer = reinterpret_cast<T *>(output->buffer());
+      assert(_lhs_shape == getShape(lhs) && _rhs_shape == getShape(rhs) &&
+             _output_shape == getShape(output));
+    auto lhs_buffer = getBuffer<T>(lhs);
+    auto rhs_buffer = getBuffer<T>(rhs);
+    auto output_buffer = getBuffer<T>(output);
      if (_need_broadcast)
      {
        nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
-          _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
+        _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
      }
      else
      {
        nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
-          _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
+        _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
      }
    }
  };
@@ -115,19 +115,17 @@ void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *
                               nnfw::cker::BinaryArithmeticOpParam *params)
  {
    int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  CalculateActivationRangeQuantized(activation, output, &output_activation_min,
+                                    &output_activation_max);
    nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
    op_params.quantized_activation_max = output_activation_max;
    op_params.quantized_activation_min = output_activation_min;
    // Parameters for scaled quantized computation
    op_params.left_shift = 20;
    // Zero-points of input and output tensors
-  op_params.input1_offset = -lhs->data_offset();
-  op_params.input2_offset = -rhs->data_offset();
-  op_params.output_offset = output->data_offset();
-  assert((op_params.input1_offset <= 0) && (op_params.input1_offset >= -255));
-  assert((op_params.input2_offset <= 0) && (op_params.input2_offset >= -255));
-  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+  op_params.input1_offset = -lhs->data_zero_point();
+  op_params.input2_offset = -rhs->data_zero_point();
+  op_params.output_offset = output->data_zero_point();
  
    // Compute normalized scale for _lhs and _rhs values,
    // and represent in 32-bit fixed point
@@ -136,7 +134,7 @@ void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *
    const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
    // output scale is used to normalize final result, so we invert the scale here
    const double real_output_scale =
-      norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+    norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
  
    // Represent the scales as fixed int32_t multipliers, and int32_t shifts
    QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
@@ -149,14 +147,15 @@ void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
                          nnfw::cker::BinaryArithmeticOpParam *params)
  {
    int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  CalculateActivationRangeQuantized(activation, output, &output_activation_min,
+                                    &output_activation_max);
    nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
  
    op_params.quantized_activation_max = output_activation_max;
    op_params.quantized_activation_min = output_activation_min;
-  op_params.input1_offset = -lhs->data_offset();
-  op_params.input2_offset = -rhs->data_offset();
-  op_params.output_offset = output->data_offset();
+  op_params.input1_offset = -lhs->data_zero_point();
+  op_params.input2_offset = -rhs->data_zero_point();
+  op_params.output_offset = output->data_zero_point();
  
    double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
    QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
@@ -184,12 +183,19 @@ void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortabl
        {
          setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
          _kernel =
-            Eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>(_lhs, _rhs, _output, op_params);
+          Eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>(_lhs, _rhs, _output, op_params);
        }
+      else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::ADD, int8_t>(_lhs, _rhs, _output, op_params);
+      }
+
        else
        {
          _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(
-            _lhs, _rhs, _output, activation, op_params);
+          _lhs, _rhs, _output, activation, op_params);
        }
        break;
      case ArithmeticType::kSub:
@@ -198,12 +204,20 @@ void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortabl
          setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
          op_params.input2_multiplier *= -1;
          _kernel =
-            Eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>(_lhs, _rhs, _output, op_params);
+          Eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>(_lhs, _rhs, _output, op_params);
+      }
+      else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        op_params.input2_multiplier *= -1;
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::SUB, int8_t>(_lhs, _rhs, _output, op_params);
        }
+
        else
        {
          _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(
-            _lhs, _rhs, _output, activation, op_params);
+          _lhs, _rhs, _output, activation, op_params);
        }
        break;
      case ArithmeticType::kMul:
@@ -212,19 +226,26 @@ void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortabl
          nnfw::cker::BinaryArithmeticOpParam op_params;
          setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
          _kernel =
-            Eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>(_lhs, _rhs, _output, op_params);
+          Eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>(_lhs, _rhs, _output, op_params);
+      }
+      else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+      {
+        nnfw::cker::BinaryArithmeticOpParam op_params;
+        setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::MUL, int8_t>(_lhs, _rhs, _output, op_params);
        }
        else
        {
          _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(
-            _lhs, _rhs, _output, activation, op_params);
+          _lhs, _rhs, _output, activation, op_params);
        }
        break;
      case ArithmeticType::kDiv:
        if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
        {
          throw std::runtime_error{
-            "BinaryArithmetic(Div): Div operation does not support quantization"};
+          "BinaryArithmetic(Div): Div operation does not support quantization"};
        }
        else if (_lhs->data_type() == OperandType::INT32)
        {
@@ -233,7 +254,7 @@ void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortabl
        else
        {
          _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(
-            _lhs, _rhs, _output, activation, op_params);
+          _lhs, _rhs, _output, activation, op_params);
        }
        break;
      default:
diff --git a/runtime/onert/backend/cpu/ops/BroadcastToLayer.cc b/runtime/onert/backend/cpu/ops/BroadcastToLayer.cc

index d9c1bbfc542d9789b87a8dc4bebea5981cfa9d37..d31b814bbc45ee5913184087efa510673d03d64a 100644 (file)
--- a/runtime/onert/backend/cpu/ops/BroadcastToLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BroadcastToLayer.cc
@@ -49,19 +49,18 @@ void BroadcastToLayer::run()
    {
      // ToDo : It need to support INT8 and UINT8 also when will be applied quantization.
      case OperandType::FLOAT32:
-      nnfw::cker::BroadcastTo<float>(
-          getTensorShape(_input), reinterpret_cast<float *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::BroadcastTo<float>(getShape(_input), reinterpret_cast<float *>(_input->buffer()),
+                                     getShape(_output), getBuffer<float>(_output));
        break;
      case OperandType::INT32:
-      nnfw::cker::BroadcastTo<int32_t>(
-          getTensorShape(_input), reinterpret_cast<int32_t *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
+      nnfw::cker::BroadcastTo<int32_t>(getShape(_input),
+                                       reinterpret_cast<int32_t *>(_input->buffer()),
+                                       getShape(_output), getBuffer<int32_t>(_output));
        break;
      case OperandType::UINT32:
-      nnfw::cker::BroadcastTo<uint32_t>(
-          getTensorShape(_input), reinterpret_cast<uint32_t *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint32_t *>(_output->buffer()));
+      nnfw::cker::BroadcastTo<uint32_t>(getShape(_input),
+                                        reinterpret_cast<uint32_t *>(_input->buffer()),
+                                        getShape(_output), getBuffer<uint32_t>(_output));
        break;
      default:
        throw std::runtime_error{"BroadcastToLayer: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/CompareLayer.cc b/runtime/onert/backend/cpu/ops/CompareLayer.cc

index adf902aafd8e3b7245196028cfa98446c15c1e7d..b621952cc3ecfc73f350b10e5a501f054fe794fd 100644 (file)
--- a/runtime/onert/backend/cpu/ops/CompareLayer.cc
+++ b/runtime/onert/backend/cpu/ops/CompareLayer.cc
@@ -49,10 +49,10 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
  {
    nnfw::cker::ComparisonParams params;
    params.left_shift = 8;
-  params.input1_offset = -lhs->data_offset();
-  params.input2_offset = -rhs->data_offset();
+  params.input1_offset = -lhs->data_zero_point();
+  params.input2_offset = -rhs->data_zero_point();
    const double norm_max_scale =
-      2 * std::max(std::abs(lhs->data_scale()), std::abs(rhs->data_scale()));
+    2 * std::max(std::abs(lhs->data_scale()), std::abs(rhs->data_scale()));
    const double adjusted_lhs_scale = lhs->data_scale() / norm_max_scale;
    const double adjusted_rhs_scale = rhs->data_scale() / norm_max_scale;
    QuantizeMultiplierSmallerThanOneExp(adjusted_lhs_scale, &params.input1_multiplier,
@@ -61,19 +61,18 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
                                        &params.input2_shift);
    params.is_broadcast = !HaveSameShapes(lhs, rhs);
  
-  using CompareFunction =
-      void (*)(ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
-               const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
-               bool *output_data);
+  using CompareFunction = void (*)(
+    ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
+    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data);
  
    static const CompareFunction broadcast_fns[] = {
-      Broadcast4DSlowEqualWithScaling,   Broadcast4DSlowNotEqualWithScaling,
-      Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
-      Broadcast4DSlowLessWithScaling,    Broadcast4DSlowLessEqualWithScaling,
+    Broadcast4DSlowEqualWithScaling,   Broadcast4DSlowNotEqualWithScaling,
+    Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
+    Broadcast4DSlowLessWithScaling,    Broadcast4DSlowLessEqualWithScaling,
    };
    static const CompareFunction non_broadcast_fns[] = {
-      EqualWithScaling,        NotEqualWithScaling, GreaterWithScaling,
-      GreaterEqualWithScaling, LessWithScaling,     LessEqualWithScaling,
+    EqualWithScaling,        NotEqualWithScaling, GreaterWithScaling,
+    GreaterEqualWithScaling, LessWithScaling,     LessEqualWithScaling,
    };
  
    static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
@@ -85,9 +84,8 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
  
    CompareFunction fn = (params.is_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
  
-  fn(params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-     getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-     getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+  fn(params, getExtendedTensorShape(lhs), getBuffer<T>(lhs), getExtendedTensorShape(rhs),
+     getBuffer<T>(rhs), getExtendedTensorShape(output), getBuffer<bool>(output));
  }
  
  template <typename T>
@@ -97,16 +95,16 @@ void compareScalar(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
    bool requires_broadcast = !HaveSameShapes(lhs, rhs);
  
    using CompareFunction =
-      void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
-               const T *input2_data, const Shape &output_shape, bool *output_data);
+    void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
+             const T *input2_data, const Shape &output_shape, bool *output_data);
  
    static const CompareFunction broadcast_fns[] = {
-      Broadcast4DSlowEqual,        Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
-      Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess,     Broadcast4DSlowLessEqual,
+    Broadcast4DSlowEqual,        Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
+    Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess,     Broadcast4DSlowLessEqual,
    };
    static const CompareFunction non_broadcast_fns[] = {
-      EqualNoScaling,        NotEqualNoScaling, GreaterNoScaling,
-      GreaterEqualNoScaling, LessNoScaling,     LessEqualNoScaling,
+    EqualNoScaling,        NotEqualNoScaling, GreaterNoScaling,
+    GreaterEqualNoScaling, LessNoScaling,     LessEqualNoScaling,
    };
  
    static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
@@ -118,16 +116,15 @@ void compareScalar(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
  
    CompareFunction fn = (requires_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
  
-  fn(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-     getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-     getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+  fn(getExtendedTensorShape(lhs), getBuffer<T>(lhs), getExtendedTensorShape(rhs), getBuffer<T>(rhs),
+     getExtendedTensorShape(output), getBuffer<bool>(output));
  }
  
  } // namespace
  
  CompareLayer::CompareLayer()
-    : _lhs(nullptr), _rhs(nullptr), _output(nullptr),
-      _op_type(ir::operation::Comparison::ComparisonType::Equal)
+  : _lhs(nullptr), _rhs(nullptr), _output(nullptr),
+    _op_type(ir::operation::Comparison::ComparisonType::Equal)
  {
    // DO NOTHING
  }
diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc

index edfdfc1a627f75ae943a52f4c5d9ef22770a0044..5d48b0e7f99890b3e344d06bfe0b229e6ea91c13 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
@@ -49,7 +49,7 @@ template <typename T> void ConcatLayer::concatenationGeneral()
  
    for (uint32_t i = 0; i < num_inputs; i++)
    {
-    inputDims.push_back(getTensorShape(_inputs[i]));
+    inputDims.push_back(getShape(_inputs[i]));
      inputDimsPtr.push_back(&inputDims[i]);
    }
  
@@ -57,11 +57,11 @@ template <typename T> void ConcatLayer::concatenationGeneral()
  
    for (const auto input : _inputs)
    {
-    inputDataPtrs.emplace_back(reinterpret_cast<const T *>(input->buffer()));
+    inputDataPtrs.emplace_back(getBuffer<T>(input));
    }
  
    nnfw::cker::Concatenation<T>(op_params, inputDimsPtr.data(), inputDataPtrs.data(),
-                               getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+                               getShape(_output), getBuffer<T>(_output));
  }
  void ConcatLayer::concatenationQuant8()
  {
@@ -71,7 +71,7 @@ void ConcatLayer::concatenationQuant8()
    std::vector<float> input_scales(num_inputs);
    for (uint32_t i = 0; i < num_inputs; i++)
    {
-    input_zeropoints[i] = _inputs[i]->data_offset();
+    input_zeropoints[i] = _inputs[i]->data_zero_point();
      input_scales[i] = _inputs[i]->data_scale();
    }
  
@@ -80,7 +80,7 @@ void ConcatLayer::concatenationQuant8()
    op_params.inputs_count = num_inputs;
    op_params.input_zeropoint = input_zeropoints.data();
    op_params.input_scale = input_scales.data();
-  op_params.output_zeropoint = _output->data_offset();
+  op_params.output_zeropoint = _output->data_zero_point();
    op_params.output_scale = _output->data_scale();
  
    std::vector<nnfw::cker::Shape *> inputDimsPtr;
@@ -89,19 +89,18 @@ void ConcatLayer::concatenationQuant8()
    inputDims.reserve(num_inputs);
    for (uint32_t i = 0; i < num_inputs; i++)
    {
-    inputDims.push_back(getTensorShape(_inputs[i]));
+    inputDims.push_back(getShape(_inputs[i]));
      inputDimsPtr.push_back(&inputDims[i]);
    }
  
    std::vector<const uint8_t *> inputDataPtrs;
    for (const auto input : _inputs)
    {
-    inputDataPtrs.emplace_back(reinterpret_cast<const uint8_t *>(input->buffer()));
+    inputDataPtrs.emplace_back(getBuffer<uint8_t>(input));
    }
  
    nnfw::cker::ConcatenationWithScaling(op_params, inputDimsPtr.data(), inputDataPtrs.data(),
-                                       getTensorShape(_output),
-                                       reinterpret_cast<uint8_t *>(_output->buffer()));
+                                       getShape(_output), getBuffer<uint8_t>(_output));
  }
  
  void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs, int32_t axis,
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc

index c964e38f9ec42e1cac50c85adf324334cad3b7eb..2255d5e9fe9ff7d29bd26f50d288066ffb460708 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -15,6 +15,7 @@
   */
  
  #include "ConvolutionLayer.h"
+#include "OperationUtils.h"
  
  #include "../Tensor.h"
  #include "ir/Padding.h"
@@ -29,11 +30,11 @@ namespace cpu
  namespace ops
  {
  ConvolutionLayer::ConvolutionLayer()
-    : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
-      _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
-      _dilationHeightFactor(1), _activation(ir::Activation::NONE),
-      _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
+  : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+    _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+    _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+    _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
  {
    // DO NOTHING
  }
@@ -57,18 +58,17 @@ void ConvolutionLayer::convFloat32()
    op_params.float_activation_max = output_activation_max;
  
    nnfw::cker::Conv &kernel = *_conv_kernel;
-  kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-         getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-         getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
-         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  kernel(op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
+         getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
+         getBuffer<float>(_output));
  }
  
  void ConvolutionLayer::convQuant8()
  {
    int32_t output_activation_min = 0;
    int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
  
    double real_multiplier = 0.0;
    int32_t output_multiplier = 0;
@@ -84,9 +84,9 @@ void ConvolutionLayer::convQuant8()
    op_params.padding_type = getPaddingType(_paddingType);
    op_params.padding_values.width = _paddingLeft;
    op_params.padding_values.height = _paddingTop;
-  op_params.input_offset = -_input->data_offset();
-  op_params.weights_offset = -_kernel->data_offset();
-  op_params.output_offset = _output->data_offset();
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.weights_offset = -_kernel->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
    op_params.output_multiplier = output_multiplier;
    op_params.output_shift = output_shift;
    op_params.quantized_activation_min = output_activation_min;
@@ -94,10 +94,35 @@ void ConvolutionLayer::convQuant8()
    op_params.is_replaced_weights = true;
  
    nnfw::cker::Conv &kernel = *_conv_kernel;
-  kernel(op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-         getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
-         getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
-         getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+  kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+         getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
+         getBuffer<uint8_t>(_output));
+}
+
+void ConvolutionLayer::convQuant8PerChannel()
+{
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
+
+  nnfw::cker::ConvParams op_params;
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
+  op_params.stride_height = _strideHeight;
+  op_params.stride_width = _strideWidth;
+  op_params.dilation_height_factor = _dilationHeightFactor;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.padding_values.height = _paddingTop;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  nnfw::cker::Conv &kernel = *_conv_kernel;
+  kernel(op_params, getShape(_input), reinterpret_cast<const int8_t *>(_input->buffer()),
+         getShape(_kernel), reinterpret_cast<const int8_t *>(_kernel->buffer()), getShape(_bias),
+         reinterpret_cast<const int32_t *>(_bias->buffer()), getShape(_output),
+         reinterpret_cast<int8_t *>(_output->buffer()));
  }
  
  void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
@@ -150,8 +175,8 @@ void ConvolutionLayer::run()
      param_padding.param.bottom = _paddingBottom;
  
      const auto padding =
-        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
-                             _dilationWidthFactor, _dilationHeightFactor);
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           _dilationWidthFactor, _dilationHeightFactor);
  
      _paddingLeft = padding.left;
      _paddingRight = padding.right;
@@ -166,6 +191,10 @@ void ConvolutionLayer::run()
    {
      convQuant8();
    }
+  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+    convQuant8PerChannel();
+  }
    else
    {
      throw std::runtime_error{"Conv: unsupported data type"};
@@ -181,9 +210,8 @@ void ConvolutionLayer::prepare()
    if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
    {
      bool is_transposed = false;
-    kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-                   getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
-                   _dilationHeightFactor);
+    kernel.prepare(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
+                   is_transposed, _dilationWidthFactor, _dilationHeightFactor);
  
      // Decrease reference of _kernel(weights) only when _kernel is constant
      if (is_transposed)
@@ -197,8 +225,22 @@ void ConvolutionLayer::prepare()
    else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
             !_input->is_dynamic() && !_output->is_dynamic())
    {
-    kernel.prepareQuant(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
-                        _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+    kernel.prepareQuant(getShape(_input), getShape(_kernel), getShape(_output), _strideWidth,
+                        _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+  }
+  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+    if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
+    {
+      GetQuantizedConvolutionMultipliersAndShifts(
+        _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+        _kernel->data_scales().size(), getShape(_kernel).Dims(0),
+        kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift());
+    }
+    else
+    {
+      throw std::runtime_error{"Conv2D: Int8 dynamic weight is not supported"};
+    }
    }
    _prepare = true;
  }
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h

index 398892e651f9672cd6178a4d92c2bd69f2809a20..5d7f7c2966d5f8e8378c9bfa081aba3bfbcfd905 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -52,6 +52,8 @@ public:
  
    void convQuant8();
  
+  void convQuant8PerChannel();
+
    void configure(const IPortableTensor *input, const IPortableTensor *kernel,
                   const IPortableTensor *bias, ir::PaddingType _paddingType,
                   const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc

index d265d0ac26dc6c149511fd4e5062247f16fac5c1..e23b7c14ad0ece6f5dbe7286e95944277dcaa654 100644 (file)
--- a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
@@ -35,9 +35,8 @@ DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _outpu
  
  template <typename T> void DepthToSpaceLayer::depthToSpace()
  {
-  nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
-                           getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()),
-                           _block_size);
+  nnfw::cker::DepthToSpace(getShape(_input), getBuffer<T>(_input), getShape(_output),
+                           getBuffer<T>(_output), _block_size);
  }
  
  void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc

index 85553d14d62b255f8160a2bc1057975dcd032a08..30641ecaeeb259435c907cc9cc900697c59d8b29 100644 (file)
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -44,19 +44,17 @@ void DepthwiseConvolutionLayer::convFloat32()
    op_params.float_activation_max = output_activation_max;
  
    nnfw::cker::DepthwiseConv<float, float>(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
-      _external_context->ruy_context());
+    op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
+    getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
+    getBuffer<float>(_output), _external_context->ruy_context());
  }
  
  void DepthwiseConvolutionLayer::convQuant8()
  {
    int32_t output_activation_min = 0;
    int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
  
    double real_multiplier = 0.0;
    int32_t output_multiplier = 0;
@@ -72,29 +70,70 @@ void DepthwiseConvolutionLayer::convQuant8()
    op_params.padding_values.width = _paddingLeft;
    op_params.padding_values.height = _paddingTop;
    op_params.depth_multiplier = _multiplier;
-  op_params.input_offset = -_input->data_offset();
-  op_params.weights_offset = -_kernel->data_offset();
-  op_params.output_offset = _output->data_offset();
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.weights_offset = -_kernel->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
    op_params.output_multiplier = output_multiplier;
    op_params.output_shift = output_shift;
    op_params.quantized_activation_min = output_activation_min;
    op_params.quantized_activation_max = output_activation_max;
  
    nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
-      op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-      getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()),
-      _external_context->ruy_context());
+    op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+    getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
+    getBuffer<uint8_t>(_output), _external_context->ruy_context());
+}
+
+void DepthwiseConvolutionLayer::convQuant8PerChannel()
+{
+  if (!_prepared)
+  {
+    prepareQuant8PerChannel();
+    _prepared = true;
+  }
+
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
+
+  nnfw::cker::DepthwiseConvParams op_params;
+  op_params.padding_type = nnfw::cker::PaddingType::kSame;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.depth_multiplier = _multiplier;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidth;
+  op_params.dilation_height_factor = _dilationHeight;
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.weights_offset = 0;
+  op_params.output_offset = _output->data_zero_point();
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  nnfw::cker::optimized_integer_ops::DepthwiseConvPerChannel(
+    op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+    getShape(_input), getBuffer<int8_t>(_input), getShape(_kernel), getBuffer<int8_t>(_kernel),
+    getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output), getBuffer<int8_t>(_output),
+    _external_context->ruy_context());
+}
+
+void DepthwiseConvolutionLayer::prepareQuant8PerChannel()
+{
+  GetQuantizedConvolutionMultipliersAndShifts(
+    _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+    _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
+    _per_channel_output_shift);
  }
  
  void DepthwiseConvolutionLayer::configure(
-    const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
-    const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
-    const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
-    const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
-    const ir::Activation activation, IPortableTensor *output,
-    const std::shared_ptr<ExternalContext> &external_context)
+  const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+  const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+  const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
+  const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
+  const ir::Activation activation, IPortableTensor *output,
+  const std::shared_ptr<ExternalContext> &external_context)
  {
    _input = input;
    _kernel = kernel;
@@ -111,6 +150,15 @@ void DepthwiseConvolutionLayer::configure(
    _activation = activation;
    _output = output;
    _external_context = external_context;
+
+  if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+    if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
+    {
+      prepareQuant8PerChannel();
+      _prepared = true;
+    }
+  }
  }
  
  void DepthwiseConvolutionLayer::run()
@@ -123,6 +171,10 @@ void DepthwiseConvolutionLayer::run()
    {
      convQuant8();
    }
+  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+    convQuant8PerChannel();
+  }
    else
    {
      throw std::runtime_error{"DepthwiseConv: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h

index fe1fcc182caf0283501ba31b93f57a23335ad727..7205506360c6196728b63da4915107265c2280bf 100644 (file)
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -42,6 +42,8 @@ public:
  
    void convQuant8();
  
+  void convQuant8PerChannel();
+
    void configure(const IPortableTensor *input, const IPortableTensor *kernel,
                   const IPortableTensor *bias, const uint32_t paddingLeft,
                   const uint32_t paddingRight, const uint32_t paddingTop,
@@ -52,6 +54,9 @@ public:
  
    void run() override;
  
+private:
+  void prepareQuant8PerChannel();
+
  private:
    const IPortableTensor *_input{nullptr};
    const IPortableTensor *_kernel{nullptr};
@@ -74,6 +79,12 @@ private:
    ir::Activation _activation{ir::Activation::NONE};
  
    std::shared_ptr<ExternalContext> _external_context;
+
+  bool _prepared{false};
+
+  // Per channel output multiplier and shift.
+  std::vector<int32_t> _per_channel_output_multiplier;
+  std::vector<int> _per_channel_output_shift;
  };
  
  } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/EinsumLayer.cc b/runtime/onert/backend/cpu/ops/EinsumLayer.cc

index 8c16740a3793de3126f25a2fa0cfaf073b01c77a..8e10c4642116871e4256e8d96b02118b64519384 100644 (file)
--- a/runtime/onert/backend/cpu/ops/EinsumLayer.cc
+++ b/runtime/onert/backend/cpu/ops/EinsumLayer.cc
@@ -28,7 +28,7 @@ namespace ops
  {
  
  EinsumLayer::EinsumLayer()
-    : _inputs(), _output(nullptr), _equation(), _einsum_kernel(new nnfw::cker::Einsum())
+  : _inputs(), _output(nullptr), _equation(), _einsum_kernel(new nnfw::cker::Einsum())
  {
    // DO NOTHING
  }
@@ -47,12 +47,11 @@ void EinsumLayer::einsumFloat32()
  
    for (uint32_t i = 0; i < num_inputs; i++)
    {
-    inputShapes.emplace_back(getTensorShape(_inputs[i]));
-    inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(_inputs[i]->buffer()));
+    inputShapes.emplace_back(getShape(_inputs[i]));
+    inputFloatPtrs.emplace_back(getBuffer<float>(_inputs[i]));
    }
  
-  kernel(_equation, inputShapes, inputFloatPtrs, getTensorShape(_output),
-         reinterpret_cast<float *>(_output->buffer()));
+  kernel(_equation, inputShapes, inputFloatPtrs, getShape(_output), getBuffer<float>(_output));
  }
  
  void EinsumLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc

index 3e1da5ec0ed8d32a2893c3f27530a2c9cee728ef..27b2cdf68bd5ca004533bdcc0c2bc6374cb651f7 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -35,7 +35,7 @@ namespace ops
  {
  
  ElementwiseActivationLayer::ElementwiseActivationLayer()
-    : _input(nullptr), _output(nullptr), _kernel()
+  : _input(nullptr), _output(nullptr), _kernel()
  {
    // DO NOTHING
  }
@@ -43,9 +43,9 @@ ElementwiseActivationLayer::ElementwiseActivationLayer()
  void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
  {
    const auto input_scale = static_cast<double>(_input->data_scale());
-  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+  const auto input_zero_point = static_cast<int32_t>(_input->data_zero_point());
    const auto output_scale = static_cast<double>(_output->data_scale());
-  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+  const auto output_zero_point = static_cast<int32_t>(_output->data_zero_point());
    const float inverse_scale = 1 / output_scale;
    int32_t maxval = std::numeric_limits<uint8_t>::max();
    int32_t minval = std::numeric_limits<uint8_t>::min();
@@ -74,9 +74,9 @@ void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivation
  void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
                                                        IPortableTensor *output)
  {
-  const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
-  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
-  uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+  const int size = MatchingFlatSize(getShape(input), getShape(output));
+  const uint8_t *input_data = getBuffer<uint8_t>(input);
+  uint8_t *output_data = getBuffer<uint8_t>(output);
  
    for (int i = 0; i < size; ++i)
    {
@@ -97,8 +97,8 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
        if (input->data_type() == OperandType::FLOAT32)
        {
          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-          nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                          getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+          nnfw::cker::ELU(getShape(input), getBuffer<float>(input), getShape(output),
+                          getBuffer<float>(output));
          };
        }
        else
@@ -116,9 +116,8 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
        else if (_input->data_type() == OperandType::FLOAT32)
        {
          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-          nnfw::cker::Logistic(getTensorShape(input),
-                               reinterpret_cast<const float *>(input->buffer()),
-                               getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+          nnfw::cker::Logistic(getShape(input), getBuffer<float>(input), getShape(output),
+                               getBuffer<float>(output));
          };
        }
        else
@@ -132,23 +131,20 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
          if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
          {
            _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-            nnfw::cker::ReLU(getTensorShape(input),
-                             reinterpret_cast<const float *>(input->buffer()),
-                             getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+            nnfw::cker::ReLU(getShape(input), getBuffer<float>(input), getShape(output),
+                             getBuffer<float>(output));
            };
          }
          else if (alpha == 6.f && beta == 0.f)
          {
            _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-            nnfw::cker::ReLU6(getTensorShape(input),
-                              reinterpret_cast<const float *>(input->buffer()),
-                              reinterpret_cast<float *>(output->buffer()));
+            nnfw::cker::ReLU6(getShape(input), getBuffer<float>(input), getBuffer<float>(output));
            };
          }
          else
          {
            throw std::runtime_error(
-              "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+            "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
          }
        }
        else
@@ -166,8 +162,8 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
        else if (_input->data_type() == OperandType::FLOAT32)
        {
          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-          nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                           getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+          nnfw::cker::Tanh(getShape(input), getBuffer<float>(input), getShape(output),
+                           getBuffer<float>(output));
          };
        }
        else
@@ -179,10 +175,9 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
        if (_input->data_type() == OperandType::FLOAT32)
        {
          _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
-          nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input),
-                                reinterpret_cast<const float *>(input->buffer()),
-                                getTensorShape(output),
-                                reinterpret_cast<float *>(output->buffer()));
+          nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getShape(input),
+                                getBuffer<float>(input), getShape(output),
+                                getBuffer<float>(output));
          };
        }
        else
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc

index 1e17a0828e030e51623eca270fc92580ef0d3bef..1704c7cc678c2e4cf1c3c56a43ed25400465081c 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -39,16 +39,13 @@ void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
  {
    if (!HaveSameShapes(lhs, rhs))
    {
-    nnfw::cker::LogicalAndBroadcast<T>(
-        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
-        reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
-        reinterpret_cast<T *>(output->buffer()));
+    nnfw::cker::LogicalAndBroadcast<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs),
+                                       getBuffer<T>(rhs), getShape(output), getBuffer<T>(output));
    }
    else
    {
-    nnfw::cker::LogicalAndElementwise<T>(
-        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-        reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+    nnfw::cker::LogicalAndElementwise<T>(getShape(lhs), getBuffer<T>(lhs), getBuffer<T>(rhs),
+                                         getBuffer<T>(output));
    }
  }
  
@@ -58,40 +55,36 @@ void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
  {
    if (!HaveSameShapes(lhs, rhs))
    {
-    nnfw::cker::LogicalOrBroadcast<T>(
-        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
-        reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
-        reinterpret_cast<T *>(output->buffer()));
+    nnfw::cker::LogicalOrBroadcast<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs),
+                                      getBuffer<T>(rhs), getShape(output), getBuffer<T>(output));
    }
    else
    {
-    nnfw::cker::LogicalOrElementwise<T>(
-        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-        reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+    nnfw::cker::LogicalOrElementwise<T>(getShape(lhs), getBuffer<T>(lhs), getBuffer<T>(rhs),
+                                        getBuffer<T>(output));
    }
  }
  
  template <typename T>
  void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
  {
-  nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+  nnfw::cker::Max<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs), getBuffer<T>(rhs),
+                     getShape(output), getBuffer<T>(output));
  }
  
  template <typename T>
  void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
  {
-  nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+  nnfw::cker::Min<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs), getBuffer<T>(rhs),
+                     getShape(output), getBuffer<T>(output));
  }
  
  bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
                         const IPortableTensor *output)
  {
    return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
-         (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+         (lhs->data_zero_point() == rhs->data_zero_point() &&
+          lhs->data_zero_point() == output->data_zero_point());
  }
  } // namespace
  
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc

index 15d7f30492f563dbd6fa1a08840dd88e472c695f..d58937b5f3f8391dbc57cf2a3225cdfc19b80714 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -23,7 +23,6 @@
  #include <cker/operation/Erf.h>
  #include <cker/operation/Exp.h>
  #include <cker/operation/LogicalNot.h>
-#include <cker/operation/Quantize.h>
  #include <cker/operation/Round.h>
  
  namespace onert
@@ -39,8 +38,8 @@ namespace
  {
  void absFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Abs(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
  }
  
  template <typename FromT>
@@ -83,8 +82,8 @@ void cast(const IPortableTensor *input, IPortableTensor *output)
    const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
    auto out = *reinterpret_cast<DataPtr *>(&output_buf);
  
-  auto input_shape = getTensorShape(input);
-  auto output_shape = getTensorShape(output);
+  auto input_shape = getShape(input);
+  auto output_shape = getShape(output);
    const auto num_elements = MatchingFlatSize(input_shape, output_shape);
  
    switch (input->data_type())
@@ -115,96 +114,85 @@ void cast(const IPortableTensor *input, IPortableTensor *output)
  
  void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Cos(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
  }
  
  void dequantizeInt8(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Dequantize(getTensorShape(input), reinterpret_cast<const int8_t *>(input->buffer()),
-                         getTensorShape(output), reinterpret_cast<float *>(output->buffer()),
-                         input->data_scale(), input->data_offset());
+  nnfw::cker::Dequantize(getShape(input), getBuffer<int8_t>(input), getShape(output),
+                         getBuffer<float>(output), input->data_scale(), input->data_zero_point());
  }
  
  void dequantizeUint8(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Dequantize(getTensorShape(input), reinterpret_cast<const uint8_t *>(input->buffer()),
-                         getTensorShape(output), reinterpret_cast<float *>(output->buffer()),
-                         input->data_scale(), input->data_offset());
+  nnfw::cker::Dequantize(getShape(input), getBuffer<uint8_t>(input), getShape(output),
+                         getBuffer<float>(output), input->data_scale(), input->data_zero_point());
  }
  
  void expFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Exp(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
  }
  
  void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Erf(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
  }
  
  void floorFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Floor(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Floor(getShape(input), getBuffer<float>(input), getShape(output),
+                    getBuffer<float>(output));
  }
  
  void logFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Log(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
  }
  
  void logicalNot(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
-                         getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+  nnfw::cker::LogicalNot(getShape(input), getBuffer<bool>(input), getShape(output),
+                         getBuffer<bool>(output));
  }
  
  template <typename T> void neg(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Neg<T>(getTensorShape(input), reinterpret_cast<const T *>(input->buffer()),
-                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
-}
-
-template <typename InputT, typename OutputT>
-void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
-{
-  nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
-                       getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
-                       output->data_scale(), output->data_offset());
+  nnfw::cker::Neg<T>(getShape(input), getBuffer<T>(input), getShape(output), getBuffer<T>(output));
  }
  
  void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Round(getShape(input), getBuffer<float>(input), getShape(output),
+                    getBuffer<float>(output));
  }
  
  void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Rsqrt(getShape(input), getBuffer<float>(input), getShape(output),
+                    getBuffer<float>(output));
  }
  
  void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Sin(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
  }
  
  void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                   getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Sqrt(getShape(input), getBuffer<float>(input), getShape(output),
+                   getBuffer<float>(output));
  }
  
  void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
  {
-  nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                     getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Square(getShape(input), getBuffer<float>(input), getShape(output),
+                     getBuffer<float>(output));
  }
  
  template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
@@ -212,9 +200,9 @@ template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPorta
    if (!HaveSameShapes(input, output))
      throw std::runtime_error{"ZerosLike: input and output shape don't match."};
  
-  auto element_size = getTensorShape(input).FlatSize();
+  auto element_size = getShape(input).FlatSize();
  
-  memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+  memset(getBuffer<T>(output), 0, element_size * sizeof(T));
  }
  } // namespace
  
@@ -335,16 +323,6 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
          throw std::runtime_error{"Neg: Unsupported  data type"};
        }
        break;
-    case ElementwiseUnaryType::kQuantize:
-      if ((input->data_type() == OperandType::FLOAT32))
-      {
-        _kernel = affineQuantize<float, uint8_t>;
-      }
-      else
-      {
-        throw std::runtime_error{"Quantize: Unsupported  data type"};
-      }
-      break;
      case ElementwiseUnaryType::kRound:
        if ((input->data_type() == OperandType::FLOAT32))
        {
@@ -410,7 +388,7 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
        }
        break;
      default:
-      throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+      throw std::runtime_error{"ElementwiseUnary: Unsupported ElementwiseUnary type"};
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc

index 5b7c179073e03eee87d2c550b1ab812b196d66df..cc12fcbd8938795d95d288d8fba3e3e310b6536f 100644 (file)
--- a/runtime/onert/backend/cpu/ops/FillLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FillLayer.cc
@@ -45,24 +45,20 @@ void FillLayer::run()
    switch (_output->data_type())
    {
      case OperandType::FLOAT32:
-      nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()),
-                                getTensorShape(_output),
-                                reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::Fill<float>(getBuffer<float>(_value), getShape(_output),
+                              getBuffer<float>(_output));
        break;
      case OperandType::INT32:
-      nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()),
-                                  getTensorShape(_output),
-                                  reinterpret_cast<int32_t *>(_output->buffer()));
+      nnfw::cker::Fill<int32_t>(getBuffer<int32_t>(_value), getShape(_output),
+                                getBuffer<int32_t>(_output));
        break;
      case OperandType::INT64:
-      nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()),
-                                  getTensorShape(_output),
-                                  reinterpret_cast<int64_t *>(_output->buffer()));
+      nnfw::cker::Fill<int64_t>(getBuffer<int64_t>(_value), getShape(_output),
+                                getBuffer<int64_t>(_output));
        break;
      case OperandType::UINT32:
-      nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()),
-                                   getTensorShape(_output),
-                                   reinterpret_cast<uint32_t *>(_output->buffer()));
+      nnfw::cker::Fill<uint32_t>(getBuffer<uint32_t>(_value), getShape(_output),
+                                 getBuffer<uint32_t>(_output));
        break;
      default:
        throw std::runtime_error{"Fill: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc

index 47ac1d873e1f5364714dca6ed5111a47846d826a..6857f7f9fc130808e4740f58667a13387932076f 100644 (file)
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -31,9 +31,9 @@ namespace ops
  {
  
  FullyConnectedLayer::FullyConnectedLayer()
-    : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
-      _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
-      _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
+  : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+    _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
+    _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
  {
    // DO NOTHING
  }
@@ -45,11 +45,10 @@ void FullyConnectedLayer::fullyConnectedFloat32()
    nnfw::cker::FullyConnectedParams op_params;
    op_params.activation = convertActivationType(_activation);
  
-  nnfw::cker::FullyConnected(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<float>(_input),
+                             getShape(_weights), getBuffer<float>(_weights), getShape(_bias),
+                             _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
+                             getBuffer<float>(_output));
  }
  
  // executionMutex is used to protect concurrent access of non-threadsafe resources
@@ -63,23 +62,22 @@ void FullyConnectedLayer::fullyConnectedQuant8()
    int32_t output_activation_max = 0;
    GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
    QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
  
    nnfw::cker::FullyConnectedParams op_params;
-  op_params.input_offset = -_input->data_offset();
-  op_params.weights_offset = -_weights->data_offset();
-  op_params.output_offset = _output->data_offset();
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.weights_offset = -_weights->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
    op_params.output_multiplier = output_multiplier;
    op_params.output_shift = output_shift;
    op_params.quantized_activation_min = output_activation_min;
    op_params.quantized_activation_max = output_activation_max;
  
-  nnfw::cker::FullyConnected(
-      op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const uint8_t *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+  nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<uint8_t>(_input),
+                             getShape(_weights), getBuffer<uint8_t>(_weights), getShape(_bias),
+                             _bias ? getBuffer<int32_t>(_bias) : nullptr, getShape(_output),
+                             getBuffer<uint8_t>(_output));
  }
  
  void FullyConnectedLayer::fullyConnectedHybrid()
@@ -87,7 +85,7 @@ void FullyConnectedLayer::fullyConnectedHybrid()
    nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
    if (!temp_arena.prepared)
    {
-    temp_arena.prepare(getTensorShape(_input), getTensorShape(_weights));
+    temp_arena.prepare(getShape(_input), getShape(_weights));
    }
  
    nnfw::cker::FullyConnectedParams op_params;
@@ -96,20 +94,16 @@ void FullyConnectedLayer::fullyConnectedHybrid()
  
  #ifndef USE_RUY_GEMV
    nnfw::cker::FullyConnectedHybrid(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
-      _external_context->ruy_context());
+    op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+    getBuffer<int8_t>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+    getShape(_output), getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
  #else
    nnfw::cker::FullyConnectedHybrid(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights),
-      (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
-                        : reinterpret_cast<const int8_t *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
-      _external_context->ruy_context());
+    op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+    (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
+                      : getBuffer<int8_t>(_weights),
+    getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
+    getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
  
    if (_cached_weights == nullptr || _is_weights_freed)
      return;
@@ -120,8 +114,8 @@ void FullyConnectedLayer::fullyConnectedHybrid()
  
    // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
    // so that handle this case
-  const int input_size = getTensorShape(_input).FlatSize();
-  if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+  const int input_size = getShape(_input).FlatSize();
+  if (nnfw::cker::IsZeroVector(getBuffer<float>(_input), input_size))
      return;
  
    auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
@@ -138,6 +132,10 @@ void FullyConnectedLayer::fullyConnectedHybrid()
    tensor->decrease_ref();
    if (tensor->buffer() == nullptr) // ref == 0?
    {
+#if defined(__ANDROID__) && (__ANDROID_API__ >= 26)
+    // NOTE This line forces OS to release any unused memory immediately
+    mallopt(M_PURGE, 0);
+#endif
      _is_weights_freed = true;
    }
  #endif
@@ -155,20 +153,16 @@ void FullyConnectedLayer::fullyConnectedSparseWeight()
    if (block_size.size() == 0)
    {
      nnfw::cker::FullyConnectedSparseWeightRandom(
-        op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-        getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
-        getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
-        w1_indices);
+      op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+      getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+      getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
    }
    else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
    {
      nnfw::cker::FullyConnectedSparseWeight16x1(
-        op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-        getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
-        getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
-        w1_indices);
+      op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+      getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+      getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
    }
    else
      throw std::runtime_error{"FullyConnected: unsupported sparsity"};
@@ -183,11 +177,10 @@ void FullyConnectedLayer::fullyConnected16x1Float32()
    nnfw::cker::FullyConnectedParams op_params;
    op_params.activation = convertActivationType(_activation);
  
-  nnfw::cker::FullyConnected16x1Float32(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::FullyConnected16x1Float32(op_params, getShape(_input), getBuffer<float>(_input),
+                                        getShape(_weights), getBuffer<float>(_weights),
+                                        getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+                                        getShape(_output), getBuffer<float>(_output));
  #else
    throw std::runtime_error{"FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
  #endif
@@ -211,7 +204,7 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl
    if (_is_shuffled16x1float32)
    {
      throw std::runtime_error{
-        "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
+      "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
    }
  #endif
    _external_context = external_context;
@@ -245,8 +238,8 @@ void FullyConnectedLayer::prepare()
  {
    if (_bias && _bias->is_constant())
    {
-    const int bias_size = getTensorShape(_bias).FlatSize();
-    if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+    const int bias_size = getShape(_bias).FlatSize();
+    if (nnfw::cker::IsZeroVector(getBuffer<float>(_bias), bias_size))
      {
        _bias = nullptr;
      }
@@ -268,7 +261,7 @@ void FullyConnectedLayer::prepare()
    if (_input->is_dynamic() || !_weights->is_constant())
      return;
  
-  const int rows = getTensorShape(_weights).Dims(0);
+  const int rows = getShape(_weights).Dims(0);
    if (rows % 4 == 0)
    {
      // TODO If it's possible to extract precaching from ruy kernel,
diff --git a/runtime/onert/backend/cpu/ops/FusedBatchNormLayer.cc b/runtime/onert/backend/cpu/ops/FusedBatchNormLayer.cc

index c2c592db774612341b48636ea1ec162e3bc5dfd6..1bec15a08aec715e8155f752a22f28bdd487dcf3 100644 (file)
--- a/runtime/onert/backend/cpu/ops/FusedBatchNormLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FusedBatchNormLayer.cc
@@ -28,8 +28,8 @@ namespace ops
  {
  
  FusedBatchNormLayer::FusedBatchNormLayer()
-    : _inputs(), _output(nullptr), _epsilon(0), _is_training(true),
-      _fusedbatchnorm_kernel(new nnfw::cker::FusedBatchNorm())
+  : _inputs(), _output(nullptr), _epsilon(0), _is_training(true),
+    _fusedbatchnorm_kernel(new nnfw::cker::FusedBatchNorm())
  {
    // DO NOTHING
  }
@@ -48,8 +48,8 @@ void FusedBatchNormLayer::fusedbatchnormFloat32()
  
    for (uint32_t i = 0; i < num_inputs; i++)
    {
-    inputShapes.emplace_back(getTensorShape(_inputs[i]));
-    inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(_inputs[i]->buffer()));
+    inputShapes.emplace_back(getShape(_inputs[i]));
+    inputFloatPtrs.emplace_back(getBuffer<float>(_inputs[i]));
    }
  
    nnfw::cker::FusedBatchNormParams param;
@@ -58,8 +58,7 @@ void FusedBatchNormLayer::fusedbatchnormFloat32()
    param.is_training = _is_training;
    param.data_format = _data_format;
  
-  kernel(inputShapes, inputFloatPtrs, getTensorShape(_output),
-         reinterpret_cast<float *>(_output->buffer()), param);
+  kernel(inputShapes, inputFloatPtrs, getShape(_output), getBuffer<float>(_output), param);
  }
  
  void FusedBatchNormLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/GatherLayer.cc b/runtime/onert/backend/cpu/ops/GatherLayer.cc

index 641daa972da27200b6e245dfb30112c5200552fa..f955eef16a68d43c7f638c35c0a111695e2d5e92 100644 (file)
--- a/runtime/onert/backend/cpu/ops/GatherLayer.cc
+++ b/runtime/onert/backend/cpu/ops/GatherLayer.cc
@@ -51,9 +51,8 @@ template <typename InputType> void GatherLayer::runByInputType()
        using IndicesType = int32_t;
  
        nnfw::cker::Gather<InputType, IndicesType>(
-          op_params, getTensorShape(_input), reinterpret_cast<const InputType *>(_input->buffer()),
-          getTensorShape(_indices), reinterpret_cast<const IndicesType *>(_indices->buffer()),
-          getTensorShape(_output), reinterpret_cast<OutputType *>(_output->buffer()));
+        op_params, getShape(_input), getBuffer<InputType>(_input), getShape(_indices),
+        getBuffer<IndicesType>(_indices), getShape(_output), getBuffer<OutputType>(_output));
        break;
      }
      case OperandType::INT64:
@@ -61,9 +60,8 @@ template <typename InputType> void GatherLayer::runByInputType()
        using IndicesType = int64_t;
  
        nnfw::cker::Gather<InputType, IndicesType>(
-          op_params, getTensorShape(_input), reinterpret_cast<const InputType *>(_input->buffer()),
-          getTensorShape(_indices), reinterpret_cast<const IndicesType *>(_indices->buffer()),
-          getTensorShape(_output), reinterpret_cast<OutputType *>(_output->buffer()));
+        op_params, getShape(_input), getBuffer<InputType>(_input), getShape(_indices),
+        getBuffer<IndicesType>(_indices), getShape(_output), getBuffer<OutputType>(_output));
        break;
      }
      default:
diff --git a/runtime/onert/backend/cpu/ops/L2NormLayer.cc b/runtime/onert/backend/cpu/ops/L2NormLayer.cc

index 0d99b0586937691501c8eba485d0c7d2e411de52..fe5019de60ca1bed6a42df969b776dc62a659d95 100644 (file)
--- a/runtime/onert/backend/cpu/ops/L2NormLayer.cc
+++ b/runtime/onert/backend/cpu/ops/L2NormLayer.cc
@@ -44,19 +44,17 @@ void L2NormLayer::run()
    switch (_input->data_type())
    {
      case OperandType::FLOAT32:
-      nnfw::cker::L2NormalizeFloat32(
-          getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::L2NormalizeFloat32(getShape(_input), getBuffer<float>(_input), getShape(_output),
+                                     getBuffer<float>(_output));
        break;
  
      case OperandType::QUANT_UINT8_ASYMM:
      {
        nnfw::cker::L2NormParams params;
-      assert(_input->data_offset() == 128);
-      params.input_zero_point = _input->data_offset();
-      nnfw::cker::L2NormalizeQuant8(
-          params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+      assert(_input->data_zero_point() == 128);
+      params.input_zero_point = _input->data_zero_point();
+      nnfw::cker::L2NormalizeQuant8(params, getShape(_input), getBuffer<uint8_t>(_input),
+                                    getShape(_output), getBuffer<uint8_t>(_output));
      }
      break;
  
diff --git a/runtime/onert/backend/cpu/ops/LSTMLayer.cc b/runtime/onert/backend/cpu/ops/LSTMLayer.cc

index a1b67565b7b036a96b0e642781994cd8645f3b3f..16b0feec87e1c6dc91bbc9a25510a804720d64cf 100644 (file)
--- a/runtime/onert/backend/cpu/ops/LSTMLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LSTMLayer.cc
@@ -44,7 +44,7 @@ T *getOptionalOutputBuffer(onert::backend::IPortableTensor *tensor, std::vector<
    else
    {
      assert(tensor->total_size() == total_size);
-    return reinterpret_cast<T *>(tensor->buffer());
+    return getBuffer<T>(tensor);
    }
  }
  
@@ -58,28 +58,29 @@ inline void initializeStateBuffer(const onert::backend::IPortableTensor *tensor_
    else
      memset(buffer, 0, tensor_in->total_size());
  }
-}
+} // namespace
  
  void LSTMLayer::LSTMFloat()
  {
-  assert(_input->num_dimensions() >= 2 && _input->num_dimensions() <= 3);
+  auto in_shape = _input->getShape();
+  assert(in_shape.rank() >= 2 && in_shape.rank() <= 3);
    int max_time, n_batch;
-  if (_input->num_dimensions() == 3)
+  if (in_shape.rank() == 3)
    {
-    max_time = (_time_major) ? _input->dimension(0) : _input->dimension(1);
-    n_batch = (_time_major) ? _input->dimension(1) : _input->dimension(0);
+    max_time = (_time_major) ? in_shape.dim(0) : in_shape.dim(1);
+    n_batch = (_time_major) ? in_shape.dim(1) : in_shape.dim(0);
    }
    else
    {
      max_time = 1;
-    n_batch = _input->dimension(0);
+    n_batch = in_shape.dim(0);
    }
-  const int n_input = _input->dimension(_input->num_dimensions() - 1);
+  const int n_input = in_shape.dim(_input->getShape().rank() - 1);
    const int aux_input_size = 0;
  
    // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = _input_to_output_weights->dimension(0);
-  const int n_output = _recurrent_to_output_weights->dimension(1);
+  const int n_cell = _input_to_output_weights->getShape().dim(0);
+  const int n_output = _recurrent_to_output_weights->getShape().dim(1);
  
    // Since we have already checked that weights are all there or none, we can
    // check the existence of only one to the get the condition.
@@ -89,14 +90,14 @@ void LSTMLayer::LSTMFloat()
    float *output_state_buf = getOptionalOutputBuffer<float>(_output_state, &_output_state_vec,
                                                             _output_state_in->total_size());
    float *cell_state_buf =
-      getOptionalOutputBuffer<float>(_cell_state, &_cell_state_vec, _cell_state_in->total_size());
+    getOptionalOutputBuffer<float>(_cell_state, &_cell_state_vec, _cell_state_in->total_size());
  
    initializeStateBuffer(_output_state_in, output_state_buf, _has_output_state_data);
    initializeStateBuffer(_cell_state_in, cell_state_buf, _has_cell_state_data);
  
    // Index the scratch buffers pointers to the global scratch buffer.
    float *scratch_buffer_buf = getOptionalOutputBuffer<float>(
-      _scratch_buffer, &_scratch_vec, n_batch * n_cell * (use_cifg ? 3 : 4) * sizeof(float));
+    _scratch_buffer, &_scratch_vec, n_batch * n_cell * (use_cifg ? 3 : 4) * sizeof(float));
    float *input_gate_scratch = nullptr;
    float *cell_gate_scratch = nullptr;
    float *forget_gate_scratch = nullptr;
@@ -117,22 +118,25 @@ void LSTMLayer::LSTMFloat()
  
    auto optional_tensor_ptr = [](const IPortableTensor *tensor) {
      // If tensor is not given or the tensor size is 0, consider it was not given
-    return (tensor && tensor->total_size() > 0) ? reinterpret_cast<float *>(tensor->buffer())
-                                                : nullptr;
+    return (tensor && tensor->total_size() > 0) ? getBuffer<float>(tensor) : nullptr;
    };
    // Optional inputs
-  float *input_to_input_weights_ptr = optional_tensor_ptr(_input_to_input_weights);
-  float *recurrent_to_input_weights_ptr = optional_tensor_ptr(_recurrent_to_input_weights);
-  float *cell_to_input_weights_ptr = optional_tensor_ptr(_cell_to_input_weights);
-  float *cell_to_forget_weights_ptr = optional_tensor_ptr(_cell_to_forget_weights);
-  float *cell_to_output_weights_ptr = optional_tensor_ptr(_cell_to_output_weights);
-  float *input_gate_bias_ptr = optional_tensor_ptr(_input_gate_bias);
-  float *projection_weights_ptr = optional_tensor_ptr(_projection_weights);
-  float *projection_bias_ptr = optional_tensor_ptr(_projection_bias);
-  float *input_layer_norm_coefficients_ptr = optional_tensor_ptr(_input_layer_norm_coefficients);
-  float *forget_layer_norm_coefficients_ptr = optional_tensor_ptr(_forget_layer_norm_coefficients);
-  float *cell_layer_norm_coefficients_ptr = optional_tensor_ptr(_cell_layer_norm_coefficients);
-  float *output_layer_norm_coefficients_ptr = optional_tensor_ptr(_output_layer_norm_coefficients);
+  const float *input_to_input_weights_ptr = optional_tensor_ptr(_input_to_input_weights);
+  const float *recurrent_to_input_weights_ptr = optional_tensor_ptr(_recurrent_to_input_weights);
+  const float *cell_to_input_weights_ptr = optional_tensor_ptr(_cell_to_input_weights);
+  const float *cell_to_forget_weights_ptr = optional_tensor_ptr(_cell_to_forget_weights);
+  const float *cell_to_output_weights_ptr = optional_tensor_ptr(_cell_to_output_weights);
+  const float *input_gate_bias_ptr = optional_tensor_ptr(_input_gate_bias);
+  const float *projection_weights_ptr = optional_tensor_ptr(_projection_weights);
+  const float *projection_bias_ptr = optional_tensor_ptr(_projection_bias);
+  const float *input_layer_norm_coefficients_ptr =
+    optional_tensor_ptr(_input_layer_norm_coefficients);
+  const float *forget_layer_norm_coefficients_ptr =
+    optional_tensor_ptr(_forget_layer_norm_coefficients);
+  const float *cell_layer_norm_coefficients_ptr =
+    optional_tensor_ptr(_cell_layer_norm_coefficients);
+  const float *output_layer_norm_coefficients_ptr =
+    optional_tensor_ptr(_output_layer_norm_coefficients);
  
    // Copy out the LSTM specific params so they can be passed in the function.
    nnfw::cker::LSTMParams lstm_params;
@@ -140,7 +144,8 @@ void LSTMLayer::LSTMFloat()
    lstm_params.cell_clip = _params.cell_threshold;
    lstm_params.proj_clip = _params.projection_threshold;
  
-  const int output_batch_leading_dim = _output->dimension(_output->num_dimensions() - 1);
+  auto out_shape = _output->getShape();
+  const int output_batch_leading_dim = out_shape.dim(out_shape.rank() - 1);
    if (_time_major)
    {
      // Loop through the sequence.
@@ -151,36 +156,33 @@ void LSTMLayer::LSTMFloat()
        // If this is the forward_sequence, step forward, otherwise step
        // backwards.
        const int t_rel = _forward_sequence ? t : max_time - t - 1;
-      const float *input_ptr = reinterpret_cast<float *>(_input->buffer()) + t_rel * input_step;
+      const float *input_ptr = getBuffer<float>(_input) + t_rel * input_step;
        const float *aux_input_ptr = nullptr;
        if (_aux_input)
        {
-        aux_input_ptr = reinterpret_cast<float *>(_aux_input->buffer()) + t_rel * input_step;
+        aux_input_ptr = getBuffer<float>(_aux_input) + t_rel * input_step;
        }
-      float *output_ptr =
-          reinterpret_cast<float *>(_output->buffer()) + t_rel * output_step + _output_offset;
+      float *output_ptr = getBuffer<float>(_output) + t_rel * output_step + _output_offset;
  
        LstmStepFloat(
-          input_ptr, input_to_input_weights_ptr,
-          reinterpret_cast<float *>(_input_to_forget_weights->buffer()),
-          reinterpret_cast<float *>(_input_to_cell_weights->buffer()),
-          reinterpret_cast<float *>(_input_to_output_weights->buffer()), aux_input_ptr,
-          /*aux_input_to_input_weights=*/nullptr,
-          /*aux_input_to_forget_weights=*/nullptr,
-          /*aux_input_to_cell_weights=*/nullptr,
-          /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
-          reinterpret_cast<float *>(_recurrent_to_forget_weights->buffer()),
-          reinterpret_cast<float *>(_recurrent_to_cell_weights->buffer()),
-          reinterpret_cast<float *>(_recurrent_to_output_weights->buffer()),
-          cell_to_input_weights_ptr, cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-          input_layer_norm_coefficients_ptr, forget_layer_norm_coefficients_ptr,
-          cell_layer_norm_coefficients_ptr, output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
-          reinterpret_cast<float *>(_forget_gate_bias->buffer()),
-          reinterpret_cast<float *>(_cell_gate_bias->buffer()),
-          reinterpret_cast<float *>(_output_gate_bias->buffer()), projection_weights_ptr,
-          projection_bias_ptr, &lstm_params, n_batch, n_cell, n_input, aux_input_size, n_output,
-          output_batch_leading_dim, output_state_buf, cell_state_buf, input_gate_scratch,
-          forget_gate_scratch, cell_gate_scratch, output_gate_scratch, output_ptr);
+        input_ptr, input_to_input_weights_ptr, getBuffer<float>(_input_to_forget_weights),
+        getBuffer<float>(_input_to_cell_weights), getBuffer<float>(_input_to_output_weights),
+        aux_input_ptr,
+        /*aux_input_to_input_weights=*/nullptr,
+        /*aux_input_to_forget_weights=*/nullptr,
+        /*aux_input_to_cell_weights=*/nullptr,
+        /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
+        getBuffer<float>(_recurrent_to_forget_weights),
+        getBuffer<float>(_recurrent_to_cell_weights),
+        getBuffer<float>(_recurrent_to_output_weights), cell_to_input_weights_ptr,
+        cell_to_forget_weights_ptr, cell_to_output_weights_ptr, input_layer_norm_coefficients_ptr,
+        forget_layer_norm_coefficients_ptr, cell_layer_norm_coefficients_ptr,
+        output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
+        getBuffer<float>(_forget_gate_bias), getBuffer<float>(_cell_gate_bias),
+        getBuffer<float>(_output_gate_bias), projection_weights_ptr, projection_bias_ptr,
+        &lstm_params, n_batch, n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim,
+        output_state_buf, cell_state_buf, input_gate_scratch, forget_gate_scratch,
+        cell_gate_scratch, output_gate_scratch, output_ptr);
      }
    }
    else
@@ -195,78 +197,69 @@ void LSTMLayer::LSTMFloat()
          // backwards.
          const int t_rel = _forward_sequence ? t : max_time - t - 1;
          const int time_offset = b * max_time + t_rel;
-        const float *input_ptr =
-            reinterpret_cast<float *>(_input->buffer()) + time_offset * input_step;
+        const float *input_ptr = getBuffer<float>(_input) + time_offset * input_step;
          const float *aux_input_ptr = nullptr;
          if (_aux_input)
          {
-          aux_input_ptr =
-              reinterpret_cast<float *>(_aux_input->buffer()) + time_offset * input_step;
+          aux_input_ptr = getBuffer<float>(_aux_input) + time_offset * input_step;
          }
-        float *output_ptr = reinterpret_cast<float *>(_output->buffer()) +
-                            time_offset * output_step + _output_offset;
+        float *output_ptr = getBuffer<float>(_output) + time_offset * output_step + _output_offset;
  
          // Offset the {output,cell}_state pointers to the right batch.
          float *output_state_ptr = output_state_buf + b * output_batch_leading_dim;
          float *cell_state_ptr = cell_state_buf + b * n_cell;
          // Offset the scratch pointers to the right batch.
          float *input_gate_scratch_ptr =
-            input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
+          input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
          float *forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
          float *cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
          float *output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
  
          LstmStepFloat(
-            input_ptr, input_to_input_weights_ptr,
-            reinterpret_cast<float *>(_input_to_forget_weights->buffer()),
-            reinterpret_cast<float *>(_input_to_cell_weights->buffer()),
-            reinterpret_cast<float *>(_input_to_output_weights->buffer()), aux_input_ptr,
-            /*aux_input_to_input_weights=*/nullptr,
-            /*aux_input_to_forget_weights=*/nullptr,
-            /*aux_input_to_cell_weights=*/nullptr,
-            /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
-            reinterpret_cast<float *>(_recurrent_to_forget_weights->buffer()),
-            reinterpret_cast<float *>(_recurrent_to_cell_weights->buffer()),
-            reinterpret_cast<float *>(_recurrent_to_output_weights->buffer()),
-            cell_to_input_weights_ptr, cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-            input_layer_norm_coefficients_ptr, forget_layer_norm_coefficients_ptr,
-            cell_layer_norm_coefficients_ptr, output_layer_norm_coefficients_ptr,
-            input_gate_bias_ptr, reinterpret_cast<float *>(_forget_gate_bias->buffer()),
-            reinterpret_cast<float *>(_cell_gate_bias->buffer()),
-            reinterpret_cast<float *>(_output_gate_bias->buffer()), projection_weights_ptr,
-            projection_bias_ptr, &lstm_params, /*n_batch=*/1, n_cell, n_input, aux_input_size,
-            n_output, output_batch_leading_dim, output_state_ptr, cell_state_ptr,
-            input_gate_scratch_ptr, forget_gate_scratch_ptr, cell_gate_scratch_ptr,
-            output_gate_scratch_ptr, output_ptr);
+          input_ptr, input_to_input_weights_ptr, getBuffer<float>(_input_to_forget_weights),
+          getBuffer<float>(_input_to_cell_weights), getBuffer<float>(_input_to_output_weights),
+          aux_input_ptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
+          getBuffer<float>(_recurrent_to_forget_weights),
+          getBuffer<float>(_recurrent_to_cell_weights),
+          getBuffer<float>(_recurrent_to_output_weights), cell_to_input_weights_ptr,
+          cell_to_forget_weights_ptr, cell_to_output_weights_ptr, input_layer_norm_coefficients_ptr,
+          forget_layer_norm_coefficients_ptr, cell_layer_norm_coefficients_ptr,
+          output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
+          getBuffer<float>(_forget_gate_bias), getBuffer<float>(_cell_gate_bias),
+          getBuffer<float>(_output_gate_bias), projection_weights_ptr, projection_bias_ptr,
+          &lstm_params, /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output,
+          output_batch_leading_dim, output_state_ptr, cell_state_ptr, input_gate_scratch_ptr,
+          forget_gate_scratch_ptr, cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr);
        }
      }
    }
  }
  
  void LSTMLayer::configure(
-    const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
-    const IPortableTensor *input_to_forget_weights, const IPortableTensor *input_to_cell_weights,
-    const IPortableTensor *input_to_output_weights,
-    const IPortableTensor *recurrent_to_input_weights,
-    const IPortableTensor *recurrent_to_forget_weights,
-    const IPortableTensor *recurrent_to_cell_weights,
-    const IPortableTensor *recurrent_to_output_weights,
-    const IPortableTensor *cell_to_input_weights, const IPortableTensor *cell_to_forget_weights,
-    const IPortableTensor *cell_to_output_weights, const IPortableTensor *input_layer_norm_weights,
-    const IPortableTensor *forget_layer_norm_weights,
-    const IPortableTensor *cell_layer_norm_weights,
-    const IPortableTensor *output_layer_norm_weights, const IPortableTensor *aux_input,
-    const IPortableTensor *aux_input_to_input_weights,
-    const IPortableTensor *aux_input_to_forget_weights,
-    const IPortableTensor *aux_input_to_cell_weights,
-    const IPortableTensor *aux_input_to_output_weights, const IPortableTensor *input_gate_bias,
-    const IPortableTensor *forget_gate_bias, const IPortableTensor *cell_gate_bias,
-    const IPortableTensor *output_gate_bias, const IPortableTensor *projection_weights,
-    const IPortableTensor *projection_bias, const IPortableTensor *output_state_in,
-    const IPortableTensor *cell_state_in, const ir::operation::LSTM::Param &params,
-    bool forward_sequence, bool time_major, int output_offset, IPortableTensor *scratch_buffer,
-    IPortableTensor *output_state, IPortableTensor *cell_state, IPortableTensor *output,
-    bool has_output_state_data, bool has_cell_state_data)
+  const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
+  const IPortableTensor *input_to_forget_weights, const IPortableTensor *input_to_cell_weights,
+  const IPortableTensor *input_to_output_weights, const IPortableTensor *recurrent_to_input_weights,
+  const IPortableTensor *recurrent_to_forget_weights,
+  const IPortableTensor *recurrent_to_cell_weights,
+  const IPortableTensor *recurrent_to_output_weights, const IPortableTensor *cell_to_input_weights,
+  const IPortableTensor *cell_to_forget_weights, const IPortableTensor *cell_to_output_weights,
+  const IPortableTensor *input_layer_norm_weights, const IPortableTensor *forget_layer_norm_weights,
+  const IPortableTensor *cell_layer_norm_weights, const IPortableTensor *output_layer_norm_weights,
+  const IPortableTensor *aux_input, const IPortableTensor *aux_input_to_input_weights,
+  const IPortableTensor *aux_input_to_forget_weights,
+  const IPortableTensor *aux_input_to_cell_weights,
+  const IPortableTensor *aux_input_to_output_weights, const IPortableTensor *input_gate_bias,
+  const IPortableTensor *forget_gate_bias, const IPortableTensor *cell_gate_bias,
+  const IPortableTensor *output_gate_bias, const IPortableTensor *projection_weights,
+  const IPortableTensor *projection_bias, const IPortableTensor *output_state_in,
+  const IPortableTensor *cell_state_in, const ir::operation::LSTM::Param &params,
+  bool forward_sequence, bool time_major, int output_offset, IPortableTensor *scratch_buffer,
+  IPortableTensor *output_state, IPortableTensor *cell_state, IPortableTensor *output,
+  bool has_output_state_data, bool has_cell_state_data)
  {
    _input = input;
    _input_to_input_weights = input_to_input_weights;
diff --git a/runtime/onert/backend/cpu/ops/LSTMLayer.h b/runtime/onert/backend/cpu/ops/LSTMLayer.h

index 5978cce637e6f2fd556e80d804bbbc9450c4a03c..72ac2ed04127b590be69a391b9218e337415abd7 100644 (file)
--- a/runtime/onert/backend/cpu/ops/LSTMLayer.h
+++ b/runtime/onert/backend/cpu/ops/LSTMLayer.h
@@ -49,33 +49,30 @@ public:
  public:
    void LSTMFloat();
  
-  void configure(const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
-                 const IPortableTensor *input_to_forget_weights,
-                 const IPortableTensor *input_to_cell_weights,
-                 const IPortableTensor *input_to_output_weights,
-                 const IPortableTensor *recurrent_to_input_weights,
-                 const IPortableTensor *recurrent_to_forget_weights,
-                 const IPortableTensor *recurrent_to_cell_weights,
-                 const IPortableTensor *recurrent_to_output_weights,
-                 const IPortableTensor *cell_to_input_weights,
-                 const IPortableTensor *cell_to_forget_weights,
-                 const IPortableTensor *cell_to_output_weights,
-                 const IPortableTensor *input_layer_norm_weights,
-                 const IPortableTensor *forget_layer_norm_weights,
-                 const IPortableTensor *cell_layer_norm_weights,
-                 const IPortableTensor *output_layer_norm_weights, const IPortableTensor *aux_input,
-                 const IPortableTensor *aux_input_to_input_weights,
-                 const IPortableTensor *aux_input_to_forget_weights,
-                 const IPortableTensor *aux_input_to_cell_weights,
-                 const IPortableTensor *aux_input_to_output_weights,
-                 const IPortableTensor *input_gate_bias, const IPortableTensor *forget_gate_bias,
-                 const IPortableTensor *cell_gate_bias, const IPortableTensor *output_gate_bias,
-                 const IPortableTensor *projection_weights, const IPortableTensor *projection_bias,
-                 const IPortableTensor *output_state_in, const IPortableTensor *cell_state_in,
-                 const ir::operation::LSTM::Param &params, bool forward_sequence, bool time_major,
-                 int32_t output_offset, IPortableTensor *scratch_buffer,
-                 IPortableTensor *output_state, IPortableTensor *cell_state,
-                 IPortableTensor *output, bool has_output_state_data, bool has_cell_state_data);
+  void configure(
+    const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
+    const IPortableTensor *input_to_forget_weights, const IPortableTensor *input_to_cell_weights,
+    const IPortableTensor *input_to_output_weights,
+    const IPortableTensor *recurrent_to_input_weights,
+    const IPortableTensor *recurrent_to_forget_weights,
+    const IPortableTensor *recurrent_to_cell_weights,
+    const IPortableTensor *recurrent_to_output_weights,
+    const IPortableTensor *cell_to_input_weights, const IPortableTensor *cell_to_forget_weights,
+    const IPortableTensor *cell_to_output_weights, const IPortableTensor *input_layer_norm_weights,
+    const IPortableTensor *forget_layer_norm_weights,
+    const IPortableTensor *cell_layer_norm_weights,
+    const IPortableTensor *output_layer_norm_weights, const IPortableTensor *aux_input,
+    const IPortableTensor *aux_input_to_input_weights,
+    const IPortableTensor *aux_input_to_forget_weights,
+    const IPortableTensor *aux_input_to_cell_weights,
+    const IPortableTensor *aux_input_to_output_weights, const IPortableTensor *input_gate_bias,
+    const IPortableTensor *forget_gate_bias, const IPortableTensor *cell_gate_bias,
+    const IPortableTensor *output_gate_bias, const IPortableTensor *projection_weights,
+    const IPortableTensor *projection_bias, const IPortableTensor *output_state_in,
+    const IPortableTensor *cell_state_in, const ir::operation::LSTM::Param &params,
+    bool forward_sequence, bool time_major, int32_t output_offset, IPortableTensor *scratch_buffer,
+    IPortableTensor *output_state, IPortableTensor *cell_state, IPortableTensor *output,
+    bool has_output_state_data, bool has_cell_state_data);
  
    void run() override;
  
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc

index 1d7ee6caafbee59bbb0c4c414810aad77f543710..a544dd97055e5b9f5b51a4355fde1040a818484c 100644 (file)
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -49,9 +49,8 @@ void LogSoftMaxLayer::logsoftmaxFloat32()
    nnfw::cker::SoftmaxParams op_params;
    op_params.beta = _beta;
    op_params.axis = _axis;
-  nnfw::cker::LogSoftmax(op_params, getTensorShape(_input),
-                         reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                         reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::LogSoftmax(op_params, getShape(_input), getBuffer<float>(_input), getShape(_output),
+                         getBuffer<float>(_output));
  }
  
  void LogSoftMaxLayer::logsoftmaxQuant8()
@@ -60,11 +59,11 @@ void LogSoftMaxLayer::logsoftmaxQuant8()
    op_params.beta = _beta;
    op_params.axis = _axis;
    op_params.table = _table;
-  op_params.zero_point = _output->data_offset();
+  op_params.zero_point = _output->data_zero_point();
    op_params.scale = _output->data_scale();
-  nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
-                         reinterpret_cast<const uint8_t *>(_input->buffer()),
-                         getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+  nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getShape(_input),
+                         getBuffer<uint8_t>(_input), getShape(_output),
+                         getBuffer<uint8_t>(_output));
  }
  
  void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
diff --git a/runtime/onert/backend/cpu/ops/MatrixBandPartLayer.cc b/runtime/onert/backend/cpu/ops/MatrixBandPartLayer.cc

index b770cce5d93864c92c671e4a9e08dc8ce7b13808..7220a2bab4094ba60f3a0f344da281e1b99c48bf 100644 (file)
--- a/runtime/onert/backend/cpu/ops/MatrixBandPartLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MatrixBandPartLayer.cc
@@ -30,7 +30,7 @@ namespace ops
  {
  
  MatrixBandPartLayer::MatrixBandPartLayer()
-    : _input(nullptr), _num_lower_diag(nullptr), _num_upper_diag(nullptr), _output(nullptr)
+  : _input(nullptr), _num_lower_diag(nullptr), _num_upper_diag(nullptr), _output(nullptr)
  {
    // DO NOTHING
  }
@@ -40,18 +40,14 @@ void MatrixBandPartLayer::matrixBandPartFloat32()
    if (_num_lower_diag->data_type() == OperandType::INT64)
    {
      nnfw::cker::MatrixBandPart<int64_t>(
-        *reinterpret_cast<const int64_t *>(_num_lower_diag->buffer()),
-        *reinterpret_cast<const int64_t *>(_num_upper_diag->buffer()), getTensorShape(_input),
-        reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-        reinterpret_cast<float *>(_output->buffer()));
+      *getBuffer<int64_t>(_num_lower_diag), *getBuffer<int64_t>(_num_upper_diag), getShape(_input),
+      getBuffer<float>(_input), getShape(_output), getBuffer<float>(_output));
    }
    else
    {
      nnfw::cker::MatrixBandPart<int32_t>(
-        *reinterpret_cast<const int32_t *>(_num_lower_diag->buffer()),
-        *reinterpret_cast<const int32_t *>(_num_upper_diag->buffer()), getTensorShape(_input),
-        reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-        reinterpret_cast<float *>(_output->buffer()));
+      *getBuffer<int32_t>(_num_lower_diag), *getBuffer<int32_t>(_num_upper_diag), getShape(_input),
+      getBuffer<float>(_input), getShape(_output), getBuffer<float>(_output));
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc

index f130692ee20fa8a4fdab6bd33a311c86b28edc7a..c86a9d12649155a530af8a2aa3dee5d0cf23dc09 100644 (file)
--- a/runtime/onert/backend/cpu/ops/MeanLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc
@@ -36,33 +36,29 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee
  
  void MeanLayer::MeanFloat32()
  {
-  const auto inputShape = getTensorShape(_input);
+  const auto inputShape = getShape(_input);
    const auto axisVec = getReducerAxes(_axes);
    bool axis_is_1_and_2 =
-      _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
-      ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+    _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+    ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
  
    if (axis_is_1_and_2)
    {
-    nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()),
-                              getTensorShape(_output),
-                              reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::MeanAxis1And2(inputShape, getBuffer<float>(_input), getShape(_output),
+                              getBuffer<float>(_output));
    }
    else
    {
-    nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()),
-                     getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
-                     axisVec);
+    nnfw::cker::Mean(inputShape, getBuffer<float>(_input), getShape(_output),
+                     getBuffer<float>(_output), axisVec);
    }
  }
  
  void MeanLayer::MeanQuant8()
  {
-  nnfw::cker::MeanQ8Asymm(getTensorShape(_input),
-                          reinterpret_cast<const uint8_t *>(_input->buffer()), _input->data_scale(),
-                          _input->data_offset(), getTensorShape(_output),
-                          reinterpret_cast<uint8_t *>(_output->buffer()), _output->data_scale(),
-                          _output->data_offset(), getReducerAxes(_axes));
+  nnfw::cker::MeanQ8Asymm(getShape(_input), getBuffer<uint8_t>(_input), _input->data_scale(),
+                          _input->data_zero_point(), getShape(_output), getBuffer<uint8_t>(_output),
+                          _output->data_scale(), _output->data_zero_point(), getReducerAxes(_axes));
  }
  
  void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *axes,
diff --git a/runtime/onert/backend/cpu/ops/OneHotLayer.cc b/runtime/onert/backend/cpu/ops/OneHotLayer.cc

index 2a82b00ee515e202cfa81c1759daa7bb1d722782..66773a608f1c04cd5df78266678d9a424b425556 100644 (file)
--- a/runtime/onert/backend/cpu/ops/OneHotLayer.cc
+++ b/runtime/onert/backend/cpu/ops/OneHotLayer.cc
@@ -33,10 +33,8 @@ template <typename T> void OneHotLayer::oneHotImpl()
  {
    // It assumes index is int32_t type.
    nnfw::cker::OneHot<T, int32_t>(
-      *reinterpret_cast<const int32_t *>(_depth->buffer()),
-      *reinterpret_cast<T *>(_on_value->buffer()), *reinterpret_cast<T *>(_off_value->buffer()),
-      _axis, getTensorShape(_indices), reinterpret_cast<const int32_t *>(_indices->buffer()),
-      getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+    *getBuffer<int32_t>(_depth), *getBuffer<T>(_on_value), *getBuffer<T>(_off_value), _axis,
+    getShape(_indices), getBuffer<int32_t>(_indices), getShape(_output), getBuffer<T>(_output));
  }
  
  void OneHotLayer::configure(const IPortableTensor *indices, const IPortableTensor *depth,
diff --git a/runtime/onert/backend/cpu/ops/OneHotLayer.h b/runtime/onert/backend/cpu/ops/OneHotLayer.h

index c054984404684a41f66f359fc2dd4f42aabc73f9..b0f03a261df6a04cb68d14777a50d024bf052ddb 100644 (file)
--- a/runtime/onert/backend/cpu/ops/OneHotLayer.h
+++ b/runtime/onert/backend/cpu/ops/OneHotLayer.h
@@ -34,8 +34,8 @@ class OneHotLayer : public ::onert::exec::IFunction
  {
  public:
    OneHotLayer()
-      : _indices(nullptr), _depth(nullptr), _on_value(nullptr), _off_value(nullptr),
-        _output(nullptr), _axis(-1)
+    : _indices(nullptr), _depth(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
+      _axis(-1)
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.cc b/runtime/onert/backend/cpu/ops/OperationUtils.cc

index 2eee6dc852303fd5bcb71732f5dc81eac6d2501f..8ac8758421ad1b803ef886120611f38be6caaac7 100644 (file)
--- a/runtime/onert/backend/cpu/ops/OperationUtils.cc
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.cc
@@ -32,16 +32,17 @@ namespace ops
  uint32_t getNumberOfDimensions(const IPortableTensor *tensor)
  {
    assert(tensor);
-  return tensor->num_dimensions();
+  return tensor->getShape().rank();
  }
  
  uint32_t getNumberOfElements(const IPortableTensor *tensor)
  {
    assert(tensor);
    uint32_t count = 1;
-  for (size_t i = 0; i < tensor->num_dimensions(); i++)
+  auto shape = tensor->getShape();
+  for (int i = 0; i < shape.rank(); i++)
    {
-    count *= tensor->dimension(i);
+    count *= shape.dim(i);
    }
    return count;
  }
@@ -49,12 +50,13 @@ uint32_t getNumberOfElements(const IPortableTensor *tensor)
  uint32_t getSizeOfDimension(const IPortableTensor *tensor, uint32_t dimensionIdx)
  {
    assert(tensor);
-  if (dimensionIdx >= tensor->num_dimensions())
+  auto shape = tensor->getShape();
+  if (dimensionIdx >= static_cast<uint32_t>(shape.rank()))
    {
      // TODO, log the error
      return 0;
    }
-  return tensor->dimension(dimensionIdx);
+  return shape.dim(dimensionIdx);
  }
  
  void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
@@ -94,6 +96,34 @@ void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPort
    *multiplier = input_product_scale / output_scale;
  }
  
+void GetQuantizedConvolutionMultipliersAndShifts(
+  float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size,
+  int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
+  std::vector<int> &per_channel_output_shift)
+{
+  // Originates from tflite's PopulateConvolutionQuantizationParams()
+  per_channel_output_multiplier.resize(num_channels);
+  per_channel_output_shift.resize(num_channels);
+
+  const bool is_per_channel = filter_scales_size > 1;
+  auto per_channel_multiplier = per_channel_output_multiplier.data();
+  auto per_channel_shift = per_channel_output_shift.data();
+  for (int i = 0; i < num_channels; ++i)
+  {
+    // If per-tensor quantization parameter is specified, broadcast it along the
+    // quantization dimension (channels_out).
+    const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
+    const double filter_scale = static_cast<double>(scale);
+    const double effective_output_scale =
+      static_cast<double>(input_scale) * filter_scale / static_cast<double>(output_scale);
+    int32_t significand;
+    int channel_shift;
+    QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
+    per_channel_multiplier[i] = significand;
+    per_channel_shift[i] = channel_shift;
+  }
+}
+
  void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
                                        int *left_shift)
  {
@@ -111,13 +141,29 @@ void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantiz
    *quantized_multiplier = static_cast<int32_t>(q_fixed);
  }
  
-void CalculateActivationRangeUint8(ir::Activation activation, const IPortableTensor *output,
-                                   int32_t *act_min, int32_t *act_max)
+void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
+                                       int32_t *act_min, int32_t *act_max)
  {
-  const int32_t qmin = std::numeric_limits<uint8_t>::min();
-  const int32_t qmax = std::numeric_limits<uint8_t>::max();
+  int32_t qmin = 0;
+  int32_t qmax = 0;
+
+  switch (output->data_type())
+  {
+    case OperandType::QUANT_UINT8_ASYMM:
+      qmin = std::numeric_limits<uint8_t>::min();
+      qmax = std::numeric_limits<uint8_t>::max();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+    case OperandType::QUANT_INT8_SYMM:
+      qmin = std::numeric_limits<int8_t>::min();
+      qmax = std::numeric_limits<int8_t>::max();
+      break;
+    default:
+      throw std::runtime_error("CalculateActivationRangeQuantized: Not supported operand type.");
+  }
+
    const auto scale = output->data_scale();
-  const auto zero_point = output->data_offset();
+  const auto zero_point = output->data_zero_point();
    auto quantize = [scale, zero_point](float f) {
      return zero_point + static_cast<int32_t>(std::round(f / scale));
    };
@@ -167,8 +213,10 @@ bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2
    if (getNumberOfDimensions(input1) != getNumberOfDimensions(input2))
      return false;
  
+  auto shape1 = input1->getShape();
+  auto shape2 = input2->getShape();
    for (uint32_t i = 0; i < getNumberOfDimensions(input1); i++)
-    if (input1->dimension(i) != input2->dimension(i))
+    if (shape1.dim(i) != shape2.dim(i))
        return false;
  
    return true;
@@ -237,20 +285,21 @@ std::vector<int32_t> getReducerAxes(const IPortableTensor *axes)
  {
    std::vector<int32_t> ret;
  
+  auto axes_vals = (axes->getShape().rank() == 0) ? 1 : axes->getShape().dim(0);
    assert(axes->layout() == ir::Layout::NHWC);
-  assert(axes->dimension(0) == axes->getShape().num_elements());
+  assert(static_cast<size_t>(axes_vals) == axes->getShape().num_elements());
    switch (axes->data_type())
    {
      case ir::DataType::INT32:
      {
-      for (size_t i = 0; i < axes->dimension(0); ++i)
-        ret.emplace_back(*(reinterpret_cast<const int32_t *>(axes->buffer()) + i));
+      for (int i = 0; i < axes_vals; ++i)
+        ret.emplace_back(*(getBuffer<int32_t>(axes) + i));
        break;
      }
      case ir::DataType::INT64:
      {
-      for (size_t i = 0; i < axes->dimension(0); ++i)
-        ret.emplace_back(*(reinterpret_cast<const int64_t *>(axes->buffer()) + i));
+      for (int i = 0; i < axes_vals; ++i)
+        ret.emplace_back(*(getBuffer<int64_t>(axes) + i));
        break;
      }
      default:
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h

index ea44aeb7a8635bbfe3cc223f0e93c6519b165f86..ac2fbb84f58181e0ac6369989bf67b619153b706 100644 (file)
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -74,7 +74,8 @@ inline nnfw::cker::Shape getExtendedTensorShape(const IPortableTensor *tensor)
    assert(tensor);
    const int32_t extended_rank = 4;
    int32_t raw_shape[extended_rank];
-  uint32_t src = extended_rank - tensor->num_dimensions();
+  auto shape = tensor->getShape();
+  uint32_t src = extended_rank - shape.rank();
    for (uint32_t i = 0; i < extended_rank; ++i)
    {
      if (i < src)
@@ -83,14 +84,14 @@ inline nnfw::cker::Shape getExtendedTensorShape(const IPortableTensor *tensor)
      }
      else
      {
-      raw_shape[i] = tensor->dimension(i - src);
+      raw_shape[i] = shape.dim(i - src);
      }
    }
  
    return nnfw::cker::Shape(extended_rank, raw_shape);
  }
  
-inline nnfw::cker::Shape getTensorShape(const IPortableTensor *tensor)
+inline nnfw::cker::Shape getShape(const IPortableTensor *tensor)
  {
    if (tensor == nullptr)
      return nnfw::cker::Shape();
@@ -160,6 +161,11 @@ void GetQuantizedConvolutionMultiplier(const IPortableTensor *inputDescr,
  void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
                                        int *left_shift);
  
+void GetQuantizedConvolutionMultipliersAndShifts(
+  float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size,
+  int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
+  std::vector<int> &per_channel_output_shift);
+
  template <typename T>
  void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
  {
@@ -194,8 +200,8 @@ void CalculateActivationRange(ir::Activation activation, T *activation_min, T *a
    }
  }
  
-void CalculateActivationRangeUint8(ir::Activation activation, const IPortableTensor *output,
-                                   int32_t *act_min, int32_t *act_max);
+void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
+                                       int32_t *act_min, int32_t *act_max);
  
  bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2);
  
@@ -207,6 +213,16 @@ nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
  
  std::vector<int32_t> getReducerAxes(const IPortableTensor *axes);
  
+template <typename T> const T *getBuffer(const IPortableTensor *tensor)
+{
+  return reinterpret_cast<const T *>(tensor->buffer());
+}
+
+template <typename T> T *getBuffer(IPortableTensor *tensor)
+{
+  return reinterpret_cast<T *>(tensor->buffer());
+}
+
  } // namespace ops
  } // namespace cpu
  } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/PackLayer.cc b/runtime/onert/backend/cpu/ops/PackLayer.cc

index 314b192a2aae973a3b147aa30ee82c0f299a5864..beac6c73be3f85662445451c25ba67459d0d35d7 100644 (file)
--- a/runtime/onert/backend/cpu/ops/PackLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PackLayer.cc
@@ -48,7 +48,7 @@ template <typename T> void PackLayer::packImpl()
  
    for (uint32_t i = 0; i < num_inputs; i++)
    {
-    inputDims.push_back(getTensorShape(_inputs[i]));
+    inputDims.push_back(getShape(_inputs[i]));
      inputDimsPtr.push_back(&inputDims[i]);
    }
  
@@ -56,11 +56,10 @@ template <typename T> void PackLayer::packImpl()
  
    for (const auto input : _inputs)
    {
-    inputPtrs.emplace_back(reinterpret_cast<const T *>(input->buffer()));
+    inputPtrs.emplace_back(getBuffer<T>(input));
    }
  
-  nnfw::cker::Pack<T>(op_params, inputPtrs.data(), getTensorShape(_output),
-                      reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::Pack<T>(op_params, inputPtrs.data(), getShape(_output), getBuffer<T>(_output));
  }
  
  void PackLayer::configure(const std::vector<const IPortableTensor *> &inputs, int32_t axis,
diff --git a/runtime/onert/backend/cpu/ops/PadLayer.cc b/runtime/onert/backend/cpu/ops/PadLayer.cc

index 6a2bf9da079ed983a4f25f5ab33b99ffcf06a465..d9da564c44c5d5fb28de4ecc74713b7b1fb19a00 100644 (file)
--- a/runtime/onert/backend/cpu/ops/PadLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PadLayer.cc
@@ -28,16 +28,15 @@ namespace ops
  {
  
  PadLayer::PadLayer()
-    : _input(nullptr), _output(nullptr), _padData(), _padRank(), _constantValueData()
+  : _input(nullptr), _output(nullptr), _padData(), _padRank(), _constantValueData()
  {
    // DO NOTHING
  }
  
  template <typename T> void PadLayer::padImpl(const T *constant_value_data)
  {
-  nnfw::cker::Pad<T>(_padData, _padRank, getTensorShape(_input),
-                     reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
-                     reinterpret_cast<T *>(_output->buffer()), constant_value_data);
+  nnfw::cker::Pad<T>(_padData, _padRank, getShape(_input), getBuffer<T>(_input), getShape(_output),
+                     getBuffer<T>(_output), constant_value_data);
  }
  
  void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
@@ -52,25 +51,35 @@ void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
  
  void PadLayer::run()
  {
-  if (_input->data_type() == OperandType::FLOAT32)
+  switch (_input->data_type())
    {
-    padImpl<float>(_constantValueData.f);
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    if (_constantValueData.u8 == nullptr)
-    {
-      uint8_t pad_value = static_cast<uint8_t>(_output->data_offset());
-      padImpl<uint8_t>(&pad_value);
-    }
-    else
-    {
-      padImpl<uint8_t>(_constantValueData.u8);
-    }
-  }
-  else
-  {
-    throw std::runtime_error{"Pad: unsupported data type"};
+    case OperandType::FLOAT32:
+      padImpl<float>(_constantValueData.f);
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      if (_constantValueData.u8 == nullptr)
+      {
+        uint8_t pad_value = static_cast<uint8_t>(_output->data_zero_point());
+        padImpl<uint8_t>(&pad_value);
+      }
+      else
+      {
+        padImpl<uint8_t>(_constantValueData.u8);
+      }
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      if (_constantValueData.i8 == nullptr)
+      {
+        int8_t pad_value = static_cast<int8_t>(_output->data_zero_point());
+        padImpl<int8_t>(&pad_value);
+      }
+      else
+      {
+        padImpl<int8_t>(_constantValueData.i8);
+      }
+      break;
+    default:
+      throw std::runtime_error{"Pad: unsupported data type"};
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc

index 85d02a75118ecaba86b40c94c98944b56454d48f..101b6f26640d2520ba16ba434a25125c442dadcb 100644 (file)
--- a/runtime/onert/backend/cpu/ops/PoolLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc
@@ -36,18 +36,16 @@ template <typename T>
  void avgPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
                 IPortableTensor *output)
  {
-  nnfw::cker::AveragePool<T>(params, getTensorShape(input),
-                             reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
-                             reinterpret_cast<T *>(output->buffer()));
+  nnfw::cker::AveragePool<T>(params, getShape(input), getBuffer<T>(input), getShape(output),
+                             getBuffer<T>(output));
  }
  
  template <typename T>
  void maxPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
                 IPortableTensor *output)
  {
-  nnfw::cker::MaxPool<T>(params, getTensorShape(input),
-                         reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
-                         reinterpret_cast<T *>(output->buffer()));
+  nnfw::cker::MaxPool<T>(params, getShape(input), getBuffer<T>(input), getShape(output),
+                         getBuffer<T>(output));
  }
  
  template <typename T>
@@ -96,29 +94,44 @@ void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLe
    _output = output;
  
    POOLING_PARAMETERS
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    float output_activation_min = 0;
-    float output_activation_max = 0;
-    CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
-    op_params.float_activation_min = output_activation_min;
-    op_params.float_activation_max = output_activation_max;
  
-    _kernel = generateKernelGeneric<float>(op_params, op_type);
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    int32_t output_activation_min = 0;
-    int32_t output_activation_max = 0;
-    CalculateActivationRangeUint8(activation, _output, &output_activation_min,
-                                  &output_activation_max);
-    op_params.quantized_activation_min = output_activation_min;
-    op_params.quantized_activation_max = output_activation_max;
-    _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
-  }
-  else
+  switch (_input->data_type())
    {
-    throw std::runtime_error{"Pool: unsupported data type"};
+    case OperandType::FLOAT32:
+    {
+      float output_activation_min = 0;
+      float output_activation_max = 0;
+      CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+      op_params.float_activation_min = output_activation_min;
+      op_params.float_activation_max = output_activation_max;
+
+      _kernel = generateKernelGeneric<float>(op_params, op_type);
+      break;
+    }
+    case OperandType::QUANT_UINT8_ASYMM:
+    {
+      int32_t output_activation_min = 0;
+      int32_t output_activation_max = 0;
+      CalculateActivationRangeQuantized(activation, _output, &output_activation_min,
+                                        &output_activation_max);
+      op_params.quantized_activation_min = output_activation_min;
+      op_params.quantized_activation_max = output_activation_max;
+      _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+      break;
+    }
+    case OperandType::QUANT_INT8_ASYMM:
+    {
+      int32_t output_activation_min = 0;
+      int32_t output_activation_max = 0;
+      CalculateActivationRangeQuantized(activation, _output, &output_activation_min,
+                                        &output_activation_max);
+      op_params.quantized_activation_min = output_activation_min;
+      op_params.quantized_activation_max = output_activation_max;
+      _kernel = generateKernelGeneric<int8_t>(op_params, op_type);
+      break;
+    }
+    default:
+      throw std::runtime_error{"Pool: unsupported data type"};
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ops/PowLayer.cc b/runtime/onert/backend/cpu/ops/PowLayer.cc

index 04a1af1e157d5ecd13e7095293e6c0df8945b820..efd024dee28a8cd336436187f24ad3d7c613e5f5 100644 (file)
--- a/runtime/onert/backend/cpu/ops/PowLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PowLayer.cc
@@ -39,15 +39,13 @@ void PowLayer::powFloat32()
    if (!HaveSameShapes(_lhs, _rhs))
    {
      nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::POW>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      op_params, getShape(_lhs), getBuffer<float>(_lhs), getShape(_rhs), getBuffer<float>(_rhs),
+      getShape(_output), getBuffer<float>(_output));
      return;
    }
  
-  nnfw::cker::powImpl(getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-                      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-                      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::powImpl(getShape(_lhs), getBuffer<float>(_lhs), getShape(_rhs),
+                      getBuffer<float>(_rhs), getShape(_output), getBuffer<float>(_output));
  }
  
  void PowLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc

new file mode 100644 (file)

index 0000000..08550e7
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+#include "QuantizeLayer.h"
+
+#include <cker/operation/Dequantize.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Quantize(getShape(input), getBuffer<InputT>(input), getShape(output),
+                       getBuffer<OutputT>(output), output->data_scale(), output->data_zero_point());
+}
+
+void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+
+  if ((_input->data_type() == OperandType::FLOAT32))
+  {
+    // DO NOTHING
+  }
+  else if (((input->data_type() == OperandType::QUANT_UINT8_ASYMM) &&
+            (output->data_type() == OperandType::QUANT_INT8_ASYMM)) ||
+           ((input->data_type() == OperandType::QUANT_INT8_ASYMM) &&
+            (output->data_type() == OperandType::QUANT_UINT8_ASYMM)))
+  {
+    const double effective_output_scale =
+      static_cast<double>(input->data_scale()) / static_cast<double>(output->data_scale());
+    QuantizeMultiplier(effective_output_scale, &_output_multiplier, &_output_shift);
+  }
+  else
+  {
+    throw std::runtime_error{"Quantize: Unsupported  data type"};
+  }
+}
+
+void QuantizeLayer::run()
+{
+  if ((_input->data_type() == OperandType::FLOAT32))
+  {
+    affineQuantize<float, uint8_t>(_input, _output);
+  }
+  else if ((_input->data_type() == OperandType::QUANT_UINT8_ASYMM) &&
+           (_output->data_type() == OperandType::QUANT_INT8_ASYMM))
+  {
+    nnfw::cker::Requantize<uint8_t, int8_t>(
+      getBuffer<uint8_t>(_input), MatchingFlatSize(getShape(_input), getShape(_output)),
+      _output_multiplier, _output_shift, _input->data_zero_point(), _output->data_zero_point(),
+      getBuffer<int8_t>(_output));
+  }
+  else if ((_input->data_type() == OperandType::QUANT_INT8_ASYMM) &&
+           (_output->data_type() == OperandType::QUANT_UINT8_ASYMM))
+  {
+    nnfw::cker::Requantize<int8_t, uint8_t>(
+      getBuffer<int8_t>(_input), MatchingFlatSize(getShape(_input), getShape(_output)),
+      _output_multiplier, _output_shift, _input->data_zero_point(), _output->data_zero_point(),
+      getBuffer<uint8_t>(_output));
+  }
+  else
+  {
+    throw std::runtime_error{"Quantize: Unsupported  data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h

new file mode 100644 (file)

index 0000000..112d315
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class QuantizeLayer : public ::onert::exec::IFunction
+{
+public:
+  QuantizeLayer() : _input(nullptr), _output(nullptr), _output_multiplier(0), _output_shift(0)
+  {
+    // DO NOTHING
+  }
+
+public:
+  void configure(const IPortableTensor *input, IPortableTensor *output);
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+  int32_t _output_multiplier;
+  int _output_shift;
+};
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RangeLayer.cc b/runtime/onert/backend/cpu/ops/RangeLayer.cc

index f00101fa80822974129025436a808593a508cc7d..a41b31b3fca9026b9db63b04fc16b490d8a84940 100644 (file)
--- a/runtime/onert/backend/cpu/ops/RangeLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RangeLayer.cc
@@ -47,16 +47,12 @@ void RangeLayer::run()
    switch (_output->data_type())
    {
      case OperandType::FLOAT32:
-      nnfw::cker::Range<float>(reinterpret_cast<float *>(_start->buffer()),
-                               reinterpret_cast<float *>(_limit->buffer()),
-                               reinterpret_cast<float *>(_delta->buffer()),
-                               reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::Range<float>(getBuffer<float>(_start), getBuffer<float>(_limit),
+                               getBuffer<float>(_delta), getBuffer<float>(_output));
        break;
      case OperandType::INT32:
-      nnfw::cker::Range<int32_t>(reinterpret_cast<int32_t *>(_start->buffer()),
-                                 reinterpret_cast<int32_t *>(_limit->buffer()),
-                                 reinterpret_cast<int32_t *>(_delta->buffer()),
-                                 reinterpret_cast<int32_t *>(_output->buffer()));
+      nnfw::cker::Range<int32_t>(getBuffer<int32_t>(_start), getBuffer<int32_t>(_limit),
+                                 getBuffer<int32_t>(_delta), getBuffer<int32_t>(_output));
        break;
      default:
        throw std::runtime_error{"Range: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/RankLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc

index 184f4925b769b7a3fa65ce75989ec27a98141e62..765c595ff2035571b34e5b5a0db5d401db5ab00d 100644 (file)
--- a/runtime/onert/backend/cpu/ops/RankLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RankLayer.cc
@@ -40,8 +40,8 @@ void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
  
  void RankLayer::run()
  {
-  int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
-  output_data[0] = _input->num_dimensions();
+  int32_t *output_data = getBuffer<int32_t>(_output);
+  output_data[0] = _input->getShape().rank();
  }
  
  } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc

index 4a55b2a33396c3edd140d7300828f8711de4f2c2..66b5abb15e92e8064b83e68bcf2084b90e0982fe 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -38,10 +38,10 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std:
                 bool keep_dims, T init_value, nnfw::cker::Reduce &reduce_kernel,
                 T reducer(const T current, const T in))
  {
-  reduce_kernel.prepare(input->num_dimensions(), axes.size());
-  bool result = reduce_kernel.ReduceGeneric<T>(
-      getTensorShape(input), reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
-      reinterpret_cast<T *>(output->buffer()), axes, keep_dims, init_value, reducer);
+  reduce_kernel.prepare(input->getShape().rank(), axes.size());
+  bool result =
+    reduce_kernel.ReduceGeneric<T>(getShape(input), getBuffer<T>(input), getShape(output),
+                                   getBuffer<T>(output), axes, keep_dims, init_value, reducer);
  
    if (!result)
    {
@@ -67,15 +67,15 @@ evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_ty
        break;
      case ReduceType::kMax:
        return std::bind(
-          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
-          keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
-          [](const T current, const T in) -> T { return (in > current) ? in : current; });
+        &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+        keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+        [](const T current, const T in) -> T { return (in > current) ? in : current; });
        break;
      case ReduceType::kMin:
        return std::bind(
-          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
-          keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
-          [](const T current, const T in) -> T { return (in < current) ? in : current; });
+        &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+        keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+        [](const T current, const T in) -> T { return (in < current) ? in : current; });
        break;
      default:
        throw std::runtime_error{"Reduce: Unsupported reduce type"};
@@ -127,21 +127,21 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
                        nnfw::cker::Reduce &reduce_kernel)
  {
    const bool same_scale = (input->data_scale() == output->data_scale() &&
-                           input->data_offset() == output->data_offset());
+                           input->data_zero_point() == output->data_zero_point());
  
-  reduce_kernel.prepare(input->num_dimensions(), axes.size());
+  reduce_kernel.prepare(input->getShape().rank(), axes.size());
  
    if (!same_scale)
    {
      std::vector<int32_t> temp_sum(output->getShape().num_elements());
      bool result = reduce_kernel.QuantizedMeanOrSum<uint8_t, int32_t>(
-        reinterpret_cast<const uint8_t *>(input->buffer()), input->data_offset(),
-        input->data_scale(), getTensorShape(input), reinterpret_cast<uint8_t *>(output->buffer()),
-        output->data_offset(), output->data_scale(), getTensorShape(output), axes, keep_dims,
-        temp_sum.data(), true, [](const int32_t current, const uint8_t in) -> int32_t {
-          const int32_t actual_in = static_cast<int32_t>(in);
-          return current + actual_in;
-        });
+      getBuffer<uint8_t>(input), input->data_zero_point(), input->data_scale(), getShape(input),
+      getBuffer<uint8_t>(output), output->data_zero_point(), output->data_scale(), getShape(output),
+      axes, keep_dims, temp_sum.data(), true,
+      [](const int32_t current, const uint8_t in) -> int32_t {
+        const int32_t actual_in = static_cast<int32_t>(in);
+        return current + actual_in;
+      });
  
      if (!result)
      {
@@ -158,8 +158,8 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
  } // namespace
  
  ReduceLayer::ReduceLayer()
-    : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
-      _kernel(), _reduceType(ReduceType::kInvalid)
+  : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+    _kernel(), _reduceType(ReduceType::kInvalid)
  {
    // DO NOTHING
  }
@@ -209,12 +209,11 @@ void ReduceLayer::run()
  {
    const auto axes = getReducerAxes(_axes);
  #ifdef USE_NEON
-  int32_t rank = _input->num_dimensions();
+  int32_t rank = _input->getShape().rank();
    if (_input->data_type() == ir::DataType::FLOAT32 && _reduceType == ReduceType::kSum &&
        axes.size() == 1 && (axes[0] == -1 || axes[0] == rank - 1))
    {
-    OptimizedReduceSum(reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_input),
-                       reinterpret_cast<float *>(_output->buffer()));
+    OptimizedReduceSum(getBuffer<float>(_input), getShape(_input), getBuffer<float>(_output));
      return;
    }
  #endif // NEON
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h

index 8265dd41fe993d59bb0768490c37baba4a61ab61..e70f0fcb21a479487fd6fb2cdadbe4bc90738715 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -72,7 +72,7 @@ private:
    std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
    std::function<void(const IPortableTensor *input, IPortableTensor *output,
                       const std::vector<int> &axes)>
-      _kernel;
+    _kernel;
  
    ReduceType _reduceType;
  };
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc

index 1fe56cb999d61858bbff6efc4f1565dc321ae46e..c32015fdc340fac045fa584a45cea16ccaba4755 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
@@ -28,8 +28,8 @@ namespace ops
  {
  
  ResizeBilinearLayer::ResizeBilinearLayer()
-    : _input(nullptr), _output(nullptr), _size(nullptr), _output_height(0), _output_width(0),
-      _align_corners(false), _half_pixel_centers(false)
+  : _input(nullptr), _output(nullptr), _size(nullptr), _output_height(0), _output_width(0),
+    _align_corners(false), _half_pixel_centers(false)
  {
    // DO NOTHING
  }
@@ -79,7 +79,7 @@ void ResizeBilinearLayer::run()
    }
    else
    {
-    const auto size_buf = reinterpret_cast<const int32_t *>(_size->buffer());
+    const auto size_buf = getBuffer<int32_t>(_size);
      params.output_height = size_buf[0];
      params.output_width = size_buf[1];
    }
@@ -89,15 +89,18 @@ void ResizeBilinearLayer::run()
    switch (_input->data_type())
    {
      case OperandType::FLOAT32:
-      nnfw::cker::ResizeBilinear(
-          params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<float>(_input),
+                                 getShape(_output), getBuffer<float>(_output));
        break;
  
      case OperandType::QUANT_UINT8_ASYMM:
-      nnfw::cker::ResizeBilinear(
-          params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+      nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<uint8_t>(_input),
+                                 getShape(_output), getBuffer<uint8_t>(_output));
+      break;
+
+    case OperandType::QUANT_INT8_ASYMM:
+      nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<int8_t>(_input),
+                                 getShape(_output), getBuffer<int8_t>(_output));
        break;
  
      case OperandType::UINT8:
diff --git a/runtime/onert/backend/cpu/ops/ReverseLayer.cc b/runtime/onert/backend/cpu/ops/ReverseLayer.cc

index 7979e77a07fa0b53146566b8444dbbdfd7ab3266..cddab302ae31833eccdda4e810b3b2deb126700e 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ReverseLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReverseLayer.cc
@@ -36,18 +36,17 @@ void ReverseLayer::run()
    {
      throw std::runtime_error{"Reverse: only support 1 axis"};
    }
-  int32_t axis = *(reinterpret_cast<int32_t *>(_axis->buffer()));
+  int32_t axis = *getBuffer<int32_t>(_axis);
    if (axis < 0)
    {
-    axis += _input->num_dimensions();
+    axis += _input->getShape().rank();
    }
  
    switch (_input->data_type())
    {
      case OperandType::FLOAT32:
-      nnfw::cker::Reverse<float>(
-          axis, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::Reverse<float>(axis, getShape(_input), getBuffer<float>(_input),
+                                 getShape(_output), getBuffer<float>(_output));
        break;
      default:
        throw std::runtime_error{"Reverse: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/SelectLayer.cc b/runtime/onert/backend/cpu/ops/SelectLayer.cc

index 95cfe1df01d02363b85b858c431ed9f95af987f5..4c28d14719a06e8029bb51ef15ef7c9507f833cb 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SelectLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SelectLayer.cc
@@ -30,7 +30,7 @@ namespace ops
  {
  
  SelectLayer::SelectLayer()
-    : _cond(nullptr), _input_true(nullptr), _input_false(nullptr), _output(nullptr)
+  : _cond(nullptr), _input_true(nullptr), _input_false(nullptr), _output(nullptr)
  {
    // DO NOTHING
  }
@@ -47,11 +47,10 @@ void SelectLayer::configure(const IPortableTensor *cond, const IPortableTensor *
  void SelectLayer::run()
  {
  
-#define KERNEL_SELECT(type, op)                                                                  \
-  nnfw::cker::op(getTensorShape(_cond), reinterpret_cast<uint8_t *>(_cond->buffer()),            \
-                 getTensorShape(_input_true), reinterpret_cast<type *>(_input_true->buffer()),   \
-                 getTensorShape(_input_false), reinterpret_cast<type *>(_input_false->buffer()), \
-                 getTensorShape(_output), reinterpret_cast<type *>(_output->buffer()));
+#define KERNEL_SELECT(type, op)                                                     \
+  nnfw::cker::op(getShape(_cond), getBuffer<uint8_t>(_cond), getShape(_input_true), \
+                 getBuffer<type>(_input_true), getShape(_input_false),              \
+                 getBuffer<type>(_input_false), getShape(_output), getBuffer<type>(_output));
  
  #define KERNEL_SWITCH(type, op)                                  \
    switch (type)                                                  \
@@ -66,8 +65,8 @@ void SelectLayer::run()
  
    auto input_type = _input_true->data_type();
    bool require_broadcast =
-      !HaveSameShapes(_input_true, _cond) || !HaveSameShapes(_input_false, _cond);
-  bool rank_one_select = ((_input_true->num_dimensions() == 1) && !require_broadcast);
+    !HaveSameShapes(_input_true, _cond) || !HaveSameShapes(_input_false, _cond);
+  bool rank_one_select = ((_input_true->getShape().rank() == 1) && !require_broadcast);
  
    if (rank_one_select)
    {
diff --git a/runtime/onert/backend/cpu/ops/ShapeLayer.cc b/runtime/onert/backend/cpu/ops/ShapeLayer.cc

index 7268a89fa7bcdc49c400392d8e367adafa61df8c..46294e948c8bf85cbc3aa6e09a012485b7837526 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ShapeLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ShapeLayer.cc
@@ -34,9 +34,10 @@ ShapeLayer::ShapeLayer() : _input(nullptr), _output(nullptr)
  
  template <typename T> void GetRawShape(const IPortableTensor *input, T *output_data)
  {
-  for (uint32_t i = 0; i < input->num_dimensions(); ++i)
+  auto shape = input->getShape();
+  for (int i = 0; i < shape.rank(); ++i)
    {
-    output_data[i] = static_cast<T>(input->dimension(i));
+    output_data[i] = static_cast<T>(shape.dim(i));
    }
  }
  
@@ -50,15 +51,15 @@ void ShapeLayer::run()
  {
    if (_output->data_type() == OperandType::UINT32)
    {
-    GetRawShape(_input, reinterpret_cast<uint32_t *>(_output->buffer()));
+    GetRawShape(_input, getBuffer<uint32_t>(_output));
    }
    else if (_output->data_type() == OperandType::INT32)
    {
-    GetRawShape(_input, reinterpret_cast<int32_t *>(_output->buffer()));
+    GetRawShape(_input, getBuffer<int32_t>(_output));
    }
    else if (_output->data_type() == OperandType::INT64)
    {
-    GetRawShape(_input, reinterpret_cast<int64_t *>(_output->buffer()));
+    GetRawShape(_input, getBuffer<int64_t>(_output));
    }
    else
    {
diff --git a/runtime/onert/backend/cpu/ops/SliceLayer.cc b/runtime/onert/backend/cpu/ops/SliceLayer.cc

index 449c073e611bca9cf75f60acfe856114fda3b1b7..6332fbb56d947c4e747bc0277cebb7026bd29a64 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SliceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SliceLayer.cc
@@ -41,8 +41,8 @@ void SliceLayer::GetBeginAndSizeVectors(int dimensions, const IPortableTensor *b
  {
    for (int idx = dimensions - 1; idx >= 0; --idx)
    {
-    begins->push_back(reinterpret_cast<T *>(begin->buffer())[idx]);
-    sizes->push_back(reinterpret_cast<T *>(size->buffer())[idx]);
+    begins->push_back(getBuffer<T>(begin)[idx]);
+    sizes->push_back(getBuffer<T>(size)[idx]);
    }
  }
  
@@ -55,10 +55,21 @@ template <typename T> void SliceLayer::sliceImpl()
    begins.reserve(kMaxDim);
    sizes.reserve(kMaxDim);
  
-  GetBeginAndSizeVectors<int32_t>(_input->num_dimensions(), _begin, _size, &begins, &sizes);
+  if (_begin->data_type() == OperandType::INT32)
+  {
+    GetBeginAndSizeVectors<int32_t>(_input->getShape().rank(), _begin, _size, &begins, &sizes);
+  }
+  else if (_begin->data_type() == OperandType::INT64)
+  {
+    GetBeginAndSizeVectors<int64_t>(_input->getShape().rank(), _begin, _size, &begins, &sizes);
+  }
+  else
+  {
+    throw std::runtime_error{"Slice: unsupported begin and/or size data type"};
+  }
  
    // begins : 0-based, sizes : 1-based
-  for (int i = _input->num_dimensions(); i < kMaxDim; ++i)
+  for (int i = _input->getShape().rank(); i < kMaxDim; ++i)
    {
      begins.push_back(0);
      sizes.push_back(1);
@@ -73,9 +84,8 @@ template <typename T> void SliceLayer::sliceImpl()
      op_params.size[i] = sizes[3 - i];
    }
  
-  nnfw::cker::Slice(op_params, getExtendedTensorShape(_input),
-                    reinterpret_cast<const T *>(_input->buffer()),
-                    reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::Slice(op_params, getExtendedTensorShape(_input), getBuffer<T>(_input),
+                    getBuffer<T>(_output));
  }
  
  void SliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc

index b42be30428a0f447a6b1ff6de7593ed17aeafb36..320914dae0bd2330db90ffb5f891a41f7d120350 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -39,8 +39,7 @@ void SoftMaxLayer::softmaxFloat32()
    if (getNumberOfDimensions(_input) == 1)
    {
      uint32_t input_size = getNumberOfElements(_input);
-    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
-                        reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(getBuffer<float>(_input), input_size, 1, _beta, getBuffer<float>(_output));
    }
    else if (getNumberOfDimensions(_input) == 2)
    {
@@ -49,73 +48,41 @@ void SoftMaxLayer::softmaxFloat32()
        throw std::runtime_error("batch_size should not be 0");
  
      uint32_t input_size = getNumberOfElements(_input) / batch_size;
-    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
-                        _beta, reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(getBuffer<float>(_input), input_size, batch_size, _beta,
+                        getBuffer<float>(_output));
    }
    else if (getNumberOfDimensions(_input) == 4)
    {
      nnfw::cker::SoftmaxParams op_params;
      op_params.beta = _beta;
-    nnfw::cker::Softmax(op_params, getTensorShape(_input),
-                        reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                        reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(op_params, getShape(_input), getBuffer<float>(_input), getShape(_output),
+                        getBuffer<float>(_output));
    }
    else
    {
      nnfw::cker::SoftmaxParams op_params;
      op_params.beta = _beta;
-    nnfw::cker::reference::Softmax(
-        op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::reference::Softmax(op_params, getShape(_input), getBuffer<float>(_input),
+                                   getShape(_output), getBuffer<float>(_output));
    }
  }
  
-void SoftMaxLayer::softmaxQuant8()
+template <typename T> void SoftMaxLayer::softmaxQuant8()
  {
-  nnfw::cker::Shape descrIn4D(4);
-
-  if (getNumberOfDimensions(_input) == 2)
-  {
-    auto batch_size = getSizeOfDimension(_input, 0);
-    if (batch_size == 0)
-      throw std::runtime_error("batch_size should not be 0");
-
-    auto input_size = getNumberOfElements(_input) / batch_size;
-    descrIn4D.SetDim(0, batch_size);
-    descrIn4D.SetDim(1, 1);
-    descrIn4D.SetDim(2, 1);
-    descrIn4D.SetDim(3, input_size);
-  }
-  else if (getNumberOfDimensions(_input) == 4)
-  {
-    descrIn4D.SetDim(0, _input->dimension(0));
-    descrIn4D.SetDim(1, _input->dimension(1));
-    descrIn4D.SetDim(2, _input->dimension(2));
-    descrIn4D.SetDim(3, _input->dimension(3));
-  }
-  else
-  {
-    throw std::runtime_error{"only 2D and 4D tensors supported"};
-  }
-  if (_output->data_offset() != 0 || _output->data_scale() != 1.f / 256)
-  {
-    throw std::runtime_error{"incorrect scale / offset for output"};
-  }
-  static const int32_t kScaledDiffIntegerBits = 5;
-  const double input_beta_real_multiplier = std::min(
-      1.0 * _beta * _input->data_scale() * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
-  int32_t input_multiplier = 0;
-  int32_t input_left_shift = 0;
-  QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
-                                   &input_left_shift);
-  float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
-
    nnfw::cker::SoftmaxParams op_params;
-  op_params.input_multiplier = input_multiplier;
-  op_params.input_left_shift = input_left_shift;
-  op_params.diff_min = diff_min;
-  nnfw::cker::Softmax(op_params, descrIn4D, reinterpret_cast<const uint8_t *>(_input->buffer()),
-                      descrIn4D, reinterpret_cast<uint8_t *>(_output->buffer()));
+  op_params.scale = _output->data_scale();
+  op_params.zero_point = _output->data_zero_point();
+  op_params.uint8_table1 = _uint8_table1;
+  op_params.uint8_table2 = _uint8_table2;
+  op_params.table = _table;
+
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+  nnfw::cker::SoftmaxInt8LUT<T, T>(op_params, getShape(_input), getBuffer<T>(_input),
+                                   getShape(_output), getBuffer<T>(_output));
+#else
+  nnfw::cker::Softmax<T, T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_output),
+                            getBuffer<T>(_output));
+#endif
  }
  
  void SoftMaxLayer::configure(const IPortableTensor *input, const float beta,
@@ -124,21 +91,36 @@ void SoftMaxLayer::configure(const IPortableTensor *input, const float beta,
    _input = input;
    _output = output;
    _beta = beta;
+
+  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM ||
+      _input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+    // Only apply when both input & output are uint8/int8 & build with clang
+    // on aarch64.
+    nnfw::cker::PopulateSoftmaxUInt8LookupTable(_uint8_table1, _uint8_table2, _input->data_scale(),
+                                                _beta);
+#else
+    nnfw::cker::PopulateSoftmaxLookupTable(_table, _input->data_scale(), _beta);
+#endif
+  }
  }
  
  void SoftMaxLayer::run()
  {
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    softmaxFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    softmaxQuant8();
-  }
-  else
+  switch (_input->data_type())
    {
-    throw std::runtime_error{"SoftMax: unsupported data type"};
+    case OperandType::FLOAT32:
+      softmaxFloat32();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      softmaxQuant8<uint8_t>();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      softmaxQuant8<int8_t>();
+      break;
+    default:
+      throw std::runtime_error{"SoftMax: unsupported data type"};
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.h b/runtime/onert/backend/cpu/ops/SoftMaxLayer.h

index d0c704c2c04e87cfaf2ee18c075d015ad4d391d3..e63be0c3e7b0777263ef82fae8d2de4d80856f86 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.h
@@ -38,7 +38,7 @@ public:
  public:
    void softmaxFloat32();
  
-  void softmaxQuant8();
+  template <typename T> void softmaxQuant8();
  
    void configure(const IPortableTensor *input, const float beta, IPortableTensor *output);
  
@@ -49,6 +49,10 @@ private:
    IPortableTensor *_output;
  
    float _beta;
+
+  float _table[256];
+  uint8_t _uint8_table1[256];
+  uint8_t _uint8_table2[256];
  };
  
  } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/SpaceToBatchNDLayer.cc b/runtime/onert/backend/cpu/ops/SpaceToBatchNDLayer.cc

index 896e262baa09a6b7d52ee146c6813fb6bc1d7d7b..8dd0a01a5a67f146c3a39bdc828282380c33d1f3 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SpaceToBatchNDLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SpaceToBatchNDLayer.cc
@@ -29,7 +29,7 @@ namespace cpu
  namespace ops
  {
  SpaceToBatchNDLayer::SpaceToBatchNDLayer()
-    : _input(nullptr), _block_shape(nullptr), _padding(nullptr), _output(nullptr)
+  : _input(nullptr), _block_shape(nullptr), _padding(nullptr), _output(nullptr)
  {
    // DO NOTHING
  }
@@ -38,7 +38,7 @@ SpaceToBatchNDLayer::SpaceToBatchNDLayer()
  void SpaceToBatchNDLayer::checkDimension()
  {
    const int kSpatialDimensionNum = 2;
-  if (_block_shape->dimension(0) != kSpatialDimensionNum)
+  if (_block_shape->getShape().dim(0) != kSpatialDimensionNum)
    {
      throw std::runtime_error("SpaceToBatchND : block_shape(block_size) tensor's rank is wrong\n");
    }
@@ -47,18 +47,17 @@ void SpaceToBatchNDLayer::checkDimension()
    // shape height and width.
    for (int dim = 0; dim < kSpatialDimensionNum; ++dim)
    {
-    int final_dim_size =
-        (_input->dimension(dim + 1) + reinterpret_cast<int32_t *>(_padding->buffer())[dim * 2] +
-         reinterpret_cast<int32_t *>(_padding->buffer())[dim * 2 + 1]);
+    int final_dim_size = (_input->getShape().dim(dim + 1) + getBuffer<int32_t>(_padding)[dim * 2] +
+                          getBuffer<int32_t>(_padding)[dim * 2 + 1]);
  
-    if (final_dim_size % reinterpret_cast<int32_t *>(_block_shape->buffer())[dim] != 0)
+    if (final_dim_size % getBuffer<int32_t>(_block_shape)[dim] != 0)
      {
        throw std::runtime_error(
-          "SpaceToBatchND : padded input's dimension is not a multiple of block size\n");
+        "SpaceToBatchND : padded input's dimension is not a multiple of block size\n");
      }
  
-    if ((int32_t)_output->dimension(dim + 1) !=
-        final_dim_size / reinterpret_cast<int32_t *>(_block_shape->buffer())[dim])
+    if ((int32_t)_output->getShape().dim(dim + 1) !=
+        final_dim_size / getBuffer<int32_t>(_block_shape)[dim])
      {
        throw std::runtime_error("SpaceToBatchND : wrong output dimension\n");
      }
@@ -66,7 +65,7 @@ void SpaceToBatchNDLayer::checkDimension()
  }
  
  template <> uint32_t SpaceToBatchNDLayer::getPad<float>() { return 0; }
-template <> uint32_t SpaceToBatchNDLayer::getPad<uint8_t>() { return _output->data_offset(); }
+template <> uint32_t SpaceToBatchNDLayer::getPad<uint8_t>() { return _output->data_zero_point(); }
  
  template <typename T> void SpaceToBatchNDLayer::spaceToBatchND()
  {
@@ -75,11 +74,10 @@ template <typename T> void SpaceToBatchNDLayer::spaceToBatchND()
    nnfw::cker::SpaceToBatchParams params;
    params.output_offset = getPad<T>();
  
-  nnfw::cker::SpaceToBatchND(
-      params, getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
-      getTensorShape(_block_shape), reinterpret_cast<const int32_t *>(_block_shape->buffer()),
-      getTensorShape(_padding), reinterpret_cast<const int32_t *>(_padding->buffer()),
-      getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::SpaceToBatchND(params, getShape(_input), getBuffer<T>(_input), getShape(_block_shape),
+                             getBuffer<int32_t>(_block_shape), getShape(_padding),
+                             getBuffer<int32_t>(_padding), getShape(_output),
+                             getBuffer<T>(_output));
  }
  
  void SpaceToBatchNDLayer::configure(const IPortableTensor *input,
diff --git a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc

index a0869aed8f83778021e4a1ab3a148c5d95902815..8271daf4238166cb69dd101242453ed4bae44a52 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
@@ -39,9 +39,8 @@ template <typename T> void SpaceToDepthLayer::spaceToDepth()
    nnfw::cker::SpaceToDepthParams params;
    params.block_size = _block_size;
  
-  nnfw::cker::SpaceToDepth(params, getTensorShape(_input),
-                           reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
-                           reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::SpaceToDepth(params, getShape(_input), getBuffer<T>(_input), getShape(_output),
+                           getBuffer<T>(_output));
  }
  
  void SpaceToDepthLayer::configure(const IPortableTensor *input, const int32_t block_size,
diff --git a/runtime/onert/backend/cpu/ops/SplitLayer.cc b/runtime/onert/backend/cpu/ops/SplitLayer.cc

index 922cde2e3ec5016d7abed7e8010dbe99d53889c8..6e4eaccd4e678ad3335358fa389de817603c9a42 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SplitLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SplitLayer.cc
@@ -41,10 +41,10 @@ template <typename T> void SplitLayer::split(void)
    {
      throw std::runtime_error("ArgMinMax: wrong shape of axis");
    }
-  auto axis = *reinterpret_cast<const int32_t *>(_axis->buffer());
+  auto axis = *getBuffer<int32_t>(_axis);
    if (axis < 0)
    {
-    axis += _input->num_dimensions();
+    axis += _input->getShape().rank();
    }
    op_params.axis = axis;
    op_params.num_split = _num_splits;
@@ -54,12 +54,12 @@ template <typename T> void SplitLayer::split(void)
    for (const auto output : _outputs)
    {
      assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
-    outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+    outputPtrs.emplace_back(getBuffer<T>(output));
    }
  
    assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
-  nnfw::cker::Split<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
-                       getTensorShape(_outputs[0]), outputPtrs.data());
+  nnfw::cker::Split<T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_outputs[0]),
+                       outputPtrs.data());
  }
  
  void SplitLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
diff --git a/runtime/onert/backend/cpu/ops/SplitVLayer.cc b/runtime/onert/backend/cpu/ops/SplitVLayer.cc

index d6ca124425a6beadc1d12c830d2776f86040700d..166e6e6fda06806fd2df40ba132a442586481ec5 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SplitVLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SplitVLayer.cc
@@ -30,7 +30,7 @@ namespace ops
  {
  
  SplitVLayer::SplitVLayer()
-    : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
+  : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
  {
    // DO NOTHING
  }
@@ -38,7 +38,7 @@ SplitVLayer::SplitVLayer()
  template <typename T> void SplitVLayer::splitV(void)
  {
    nnfw::cker::SplitVParams op_params;
-  op_params.axis = *(reinterpret_cast<const int32_t *>(_split_dim->buffer()));
+  op_params.axis = *getBuffer<int32_t>(_split_dim);
    op_params.num_split = _num_splits;
  
    std::vector<T *> outputPtrs;
@@ -47,13 +47,13 @@ template <typename T> void SplitVLayer::splitV(void)
    for (const auto output : _outputs)
    {
      assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
-    outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
-    outshape.emplace_back(getTensorShape(output));
+    outputPtrs.emplace_back(getBuffer<T>(output));
+    outshape.emplace_back(getShape(output));
    }
  
    assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
-  nnfw::cker::SplitV<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
-                        outshape, outputPtrs.data());
+  nnfw::cker::SplitV<T>(op_params, getShape(_input), getBuffer<T>(_input), outshape,
+                        outputPtrs.data());
  }
  
  void SplitVLayer::configure(const IPortableTensor *input, const IPortableTensor *size_splits,
diff --git a/runtime/onert/backend/cpu/ops/SquaredDiffLayer.cc b/runtime/onert/backend/cpu/ops/SquaredDiffLayer.cc

index cf67a5c00875742277204857e1ea0780e208a685..78984c5a93eb29cbac9e7b759714a6347c846b4b 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SquaredDiffLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SquaredDiffLayer.cc
@@ -36,9 +36,8 @@ SqDiffLayer::SqDiffLayer() : _input1(nullptr), _input2(nullptr), _output(nullptr
  
  void SqDiffLayer::SqDiffFloat32()
  {
-  nnfw::cker::SqDiff(getTensorShape(_input1), reinterpret_cast<const float *>(_input1->buffer()),
-                     getTensorShape(_input2), reinterpret_cast<const float *>(_input2->buffer()),
-                     getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::SqDiff(getShape(_input1), getBuffer<float>(_input1), getShape(_input2),
+                     getBuffer<float>(_input2), getShape(_output), getBuffer<float>(_output));
  }
  
  void SqDiffLayer::configure(const IPortableTensor *input1, const IPortableTensor *input2,
diff --git a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc

index b8dfcb4b53a33108d1d282be2e08e5d30eaf7059..587582e8f93a49d8b293583b01fb19c9a830505a 100644 (file)
--- a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
+++ b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
@@ -28,7 +28,7 @@ namespace ops
  {
  
  StatelessRandomUniformLayer::StatelessRandomUniformLayer()
-    : _shape(nullptr), _seed(nullptr), _output(nullptr)
+  : _shape(nullptr), _seed(nullptr), _output(nullptr)
  {
    // DO NOTHING
  }
@@ -43,10 +43,9 @@ void StatelessRandomUniformLayer::configure(const IPortableTensor *shape,
  
  void StatelessRandomUniformLayer::StatelessRandomUniformFloat32()
  {
-  nnfw::cker::StatelessRandomUniform(
-      getTensorShape(_shape), reinterpret_cast<const int *>(_shape->buffer()),
-      getTensorShape(_seed), reinterpret_cast<const int *>(_seed->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::StatelessRandomUniform(getShape(_shape), getBuffer<int32_t>(_shape), getShape(_seed),
+                                     getBuffer<int32_t>(_seed), getShape(_output),
+                                     getBuffer<float>(_output));
  }
  
  void StatelessRandomUniformLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc b/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc

index f77f4d6911a1cd56bb012779986af992f397ce04..bb8550ad0229673ce41dff56fb8da9b9f8451696 100644 (file)
--- a/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc
@@ -30,24 +30,23 @@ namespace ops
  {
  
  StridedSliceLayer::StridedSliceLayer()
-    : _input(nullptr), _begin(nullptr), _end(nullptr), _strides(nullptr), _output(nullptr),
-      _begin_mask(0), _ellipsis_mask(0), _end_mask(0), _new_axis_mask(0), _shrink_axis_mask(0)
+  : _input(nullptr), _begin(nullptr), _end(nullptr), _strides(nullptr), _output(nullptr),
+    _begin_mask(0), _ellipsis_mask(0), _end_mask(0), _new_axis_mask(0), _shrink_axis_mask(0)
  {
  }
  
  template <typename T> void StridedSliceLayer::stridedSliceImpl()
  {
-  const auto input_shape = getTensorShape(_input);
-  const auto output_shape = getTensorShape(_output);
+  const auto input_shape = getShape(_input);
+  const auto output_shape = getShape(_output);
    auto op_params = nnfw::cker::buildStridedSliceParams(
-      reinterpret_cast<uint32_t *>(_begin->buffer()), reinterpret_cast<uint32_t *>(_end->buffer()),
-      reinterpret_cast<uint32_t *>(_strides->buffer()), _begin_mask, _end_mask, _shrink_axis_mask,
-      input_shape.DimensionsCount());
+    getBuffer<uint32_t>(_begin), getBuffer<uint32_t>(_end), getBuffer<uint32_t>(_strides),
+    _begin_mask, _end_mask, _shrink_axis_mask, input_shape.DimensionsCount());
  
    nnfw::cker::checkOutputSize(op_params, input_shape, output_shape, input_shape.DimensionsCount());
  
-  nnfw::cker::StridedSlice(op_params, input_shape, reinterpret_cast<const T *>(_input->buffer()),
-                           output_shape, reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::StridedSlice(op_params, input_shape, getBuffer<T>(_input), output_shape,
+                           getBuffer<T>(_output));
  }
  
  void StridedSliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
diff --git a/runtime/onert/backend/cpu/ops/TileLayer.cc b/runtime/onert/backend/cpu/ops/TileLayer.cc

index bfc371972f6f474c74745068066812b6334ac25a..1f018db93eab346cae327742691f90537ed2da7e 100644 (file)
--- a/runtime/onert/backend/cpu/ops/TileLayer.cc
+++ b/runtime/onert/backend/cpu/ops/TileLayer.cc
@@ -36,9 +36,8 @@ TileLayer::TileLayer() : _input(nullptr), _multipliers(nullptr), _output(nullptr
  
  void TileLayer::tileFloat32()
  {
-  TileOneDimension(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   reinterpret_cast<const int *>(_multipliers->buffer()),
-                   reinterpret_cast<float *>(_output->buffer()), 0);
+  TileOneDimension(getShape(_input), getBuffer<float>(_input), getBuffer<int>(_multipliers),
+                   getBuffer<float>(_output), 0);
  }
  
  void TileLayer::tileQuant8()
diff --git a/runtime/onert/backend/cpu/ops/TransposeLayer.cc b/runtime/onert/backend/cpu/ops/TransposeLayer.cc

index 3362c33962028437a73e9a92a236cd607e4826a4..850c07ab838ea0cc8ce82c0d76ebbbd9efd0226a 100644 (file)
--- a/runtime/onert/backend/cpu/ops/TransposeLayer.cc
+++ b/runtime/onert/backend/cpu/ops/TransposeLayer.cc
@@ -38,33 +38,33 @@ TransposeLayer::TransposeLayer() : _input(nullptr), _perm(nullptr), _output(null
  template <typename T> void TransposeLayer::transpose()
  {
    nnfw::cker::TransposeParams param;
-  assert(_perm->num_dimensions() == 1);
+  auto perm_shape = _perm->getShape();
+  assert(perm_shape.rank() == 1);
  
-  param.perm_count = _input->num_dimensions();
-  if (_perm->dimension(0) == 0) // This means _perm is (n-1...0)
+  param.perm_count = _input->getShape().rank();
+  if (perm_shape.dim(0) == 0) // This means _perm is (n-1...0)
    {
      const auto begin = param.perm;
-    const auto end = param.perm + _input->num_dimensions();
+    const auto end = param.perm + _input->getShape().rank();
      std::iota(begin, end, 0);
      std::reverse(begin, end);
    }
    else
    {
-    assert(param.perm_count == static_cast<int>(_perm->dimension(0)));
+    assert(param.perm_count == static_cast<int>(perm_shape.dim(0)));
      for (auto i = 0; i < param.perm_count; i++)
      {
-      param.perm[i] = *(reinterpret_cast<const int32_t *>(_perm->buffer()) + i);
+      param.perm[i] = *(getBuffer<int32_t>(_perm) + i);
      }
    }
  
-  nnfw::cker::Transpose(param, getTensorShape(_input),
-                        reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
-                        reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::Transpose(param, getShape(_input), getBuffer<T>(_input), getShape(_output),
+                        getBuffer<T>(_output));
  }
  
  void TransposeLayer::transposeQuant8()
  {
-  if (_input->data_offset() != _output->data_offset())
+  if (_input->data_zero_point() != _output->data_zero_point())
    {
      throw std::runtime_error("TransposeLayer : qassym8 input and output offsets unmatched");
    }
diff --git a/runtime/onert/backend/cpu/ops/UnpackLayer.cc b/runtime/onert/backend/cpu/ops/UnpackLayer.cc

index 428b385881d05ca54d9a17c380f430631031fc5f..f18fb9483292798bddbcd4f19cd444925a03a47e 100644 (file)
--- a/runtime/onert/backend/cpu/ops/UnpackLayer.cc
+++ b/runtime/onert/backend/cpu/ops/UnpackLayer.cc
@@ -47,7 +47,7 @@ template <typename T> void UnpackLayer::unpackImpl()
  
    for (int32_t i = 0; i < _num_output; i++)
    {
-    outputDims.push_back(getTensorShape(_outputs[i]));
+    outputDims.push_back(getShape(_outputs[i]));
      outputDimsPtr.push_back(&outputDims[i]);
    }
  
@@ -55,11 +55,11 @@ template <typename T> void UnpackLayer::unpackImpl()
  
    for (const auto output : _outputs)
    {
-    outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+    outputPtrs.emplace_back(getBuffer<T>(output));
    }
  
-  nnfw::cker::Unpack<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
-                        getTensorShape(_outputs[0]), outputPtrs.data());
+  nnfw::cker::Unpack<T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_outputs[0]),
+                        outputPtrs.data());
  }
  
  void UnpackLayer::configure(const IPortableTensor *input, uint32_t axis, int32_t num,
diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h

index bc8a024d87bb6798947523cc8100df7953a957da..4077965c45056dc01b63029c35ee3a7a135eeeb6 100644 (file)
--- a/runtime/onert/backend/ruy/Backend.h
+++ b/runtime/onert/backend/ruy/Backend.h
@@ -19,7 +19,6 @@
  
  #include "BackendContext.h"
  #include "Config.h"
-#include "ConstantInitializer.h"
  #include "KernelGenerator.h"
  
  #include <backend/Backend.h>
@@ -40,19 +39,16 @@ public:
  
    std::shared_ptr<IConfig> config() const override { return _config; }
  
-  std::unique_ptr<onert::backend::BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
-             bool) const override
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
    {
-    const auto &operands = graph.operands();
-    const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto custom_kernel_builder = data.custom_kernel_builder;
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
      auto tb = std::make_shared<TensorBuilder>(tr);
      context->tensor_registry = tr;
      context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
                                                              context->external_context());
      return context;
    }
diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc

index ef686f48045d58429db7e39c39e7108bad676749..8777726197eff4f73b3c3f211ed2f516469fba87 100644 (file)
--- a/runtime/onert/backend/ruy/BackendContext.cc
+++ b/runtime/onert/backend/ruy/BackendContext.cc
@@ -22,7 +22,7 @@
  #include "ir/Index.h"
  #include "ir/OperandIndexMap.h"
  #include "ir/OperandIndexSequence.h"
-#include "backend/cpu_common/BackendContextHelpers.h"
+#include "backend/basic/BackendContextHelpers.h"
  
  namespace onert
  {
@@ -31,107 +31,24 @@ namespace backend
  namespace ruy
  {
  
-void BackendContext::initConsts()
-{
-  for (auto &op : operation_list())
-  {
-    constant_initializer->setLayout(op.layout);
-    graph()->operations().at(op.index).accept(*constant_initializer);
-  }
-
-  for (auto ind : operand_list())
-  {
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  }
-
-  constant_initializer->run();
-}
-
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                            const ir::OpSequences &op_seqs,
-                                            const ir::LowerInfoMap &lower_info)
-{
-  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
-                  ir::Remove::DUPLICATED;
-  for (auto index : operand_list())
-  {
-    if (model_io.contains(index))
-      continue;
-    const auto &obj = graph()->operands().at(index);
-    const auto frontend_layout = [&]() {
-      if (obj.getUses().size() == 0)
-        return ir::Layout::UNKNOWN;
-      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
-      for (auto &operation_info : operation_list())
-      {
-        if (operation_info.index == use_op_ind)
-          return operation_info.layout;
-      }
-      return ir::Layout::UNKNOWN;
-    }();
-    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
-    if (permute_factor.backend() != backend())
-      continue;
-    const auto backend_layout = permute_factor.layout();
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
-  }
-
-  // TODO Get compiler options from compiler, and use it rather than getting it from Env
-  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
-  {
-    cpu_common::planTensors(*this, order, op_seqs, lower_info);
-  }
-  else
-  {
-    // For the executors that does not have fixed linear execution order:
-    // To make tensors never be deallocated, this is a workaround to use static memory planner
-    for (auto ind : operand_list())
-    {
-      if (tensor_builder->isRegistered(ind))
-        tensor_builder->notifyFirstUse(ind);
-    }
-  }
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
  
-  tensor_builder->prepare();
-
-  return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                       const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
  {
    FunctionMap ret;
  
-  for (auto op_seq_ind : order)
+  for (auto op_ind : _data.op_order)
    {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    bool assigned = [&]() {
-      for (auto op_info : operation_list())
-        if (op_seq.exist(op_info.index))
-          return true;
-      return false;
-    }();
-    if (!assigned)
-      continue;
-    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
-    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
    }
  
-  initConsts();
+  basic::initConsts(*this);
  
    // NOTE For memory optimization, we want to free some operand data
-  for (auto ind : operand_list())
-  {
-    // TODO Remove const_cast
-    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
-    obj.releaseData();
-  }
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
  
    for (auto &it : ret)
    {
diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h

index b965c9a9d0e14c67abb6549e235c77520d7bd3ed..0dc30f557e756b044d7a0980d6438dbee570c6ee 100644 (file)
--- a/runtime/onert/backend/ruy/BackendContext.h
+++ b/runtime/onert/backend/ruy/BackendContext.h
@@ -19,7 +19,6 @@
  
  #include <backend/BackendContext.h>
  #include "TensorBuilder.h"
-#include "ConstantInitializer.h"
  #include "KernelGenerator.h"
  #include "ExternalContext.h"
  
@@ -33,35 +32,28 @@ namespace ruy
  class BackendContext : public onert::backend::BackendContext
  {
  public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
+  BackendContext(const Backend *backend, ContextData &&data,
                   std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                   std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
                   std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry),
-        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
-        kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(new ExternalContext)
    {
    }
  
-  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                              const ir::OpSequences &op_seqs,
-                              const ir::LowerInfoMap &lower_info) override;
+  ITensorRegistry *genTensors() override;
  
-  FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
-                         const ir::OpSequences &op_seqs) override;
+  FunctionMap genKernels() override;
  
    std::shared_ptr<ExternalContext> external_context() { return _external_context; }
  
  private:
-  void initConsts();
-  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+  void planTensors(const std::vector<onert::ir::OperationIndex> &order,
+                   const compiler::GraphLowerInfo &lower_info);
  
  public:
    // TODO Make it private
    std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
    std::shared_ptr<KernelGenerator> kernel_gen;
  
  private:
diff --git a/runtime/onert/backend/ruy/Config.cc b/runtime/onert/backend/ruy/Config.cc

index 179caa9a687777deeb8e3fbd30f3e53778c93c35..c794f89bf3522da7702a6b6ec2045bf3e9260493 100644 (file)
--- a/runtime/onert/backend/ruy/Config.cc
+++ b/runtime/onert/backend/ruy/Config.cc
@@ -27,6 +27,6 @@ bool Config::initialize() { return true; }
  
  ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
  
-} // namespace cpu
+} // namespace ruy
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/ruy/ConstantInitializer.h b/runtime/onert/backend/ruy/ConstantInitializer.h

deleted file mode 100644 (file)

index 24b4d92..0000000
--- a/runtime/onert/backend/ruy/ConstantInitializer.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
-
-#include <backend/cpu_common/ConstantInitializer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace ruy
-{
-
-using ConstantInitializer = cpu_common::ConstantInitializer;
-
-} // namespace ruy
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h

index f51faccb80c833199851e0aa076dc0523d01c220..3cc4eaa5a4437b90157e8b28bd09608641712b61 100644 (file)
--- a/runtime/onert/backend/ruy/ExternalContext.h
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -20,11 +20,6 @@
  #include <util/ConfigSource.h>
  #include <ruy/context.h>
  
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 4;
-}
-
  namespace onert
  {
  namespace backend
@@ -34,6 +29,9 @@ namespace ruy
  
  class ExternalContext
  {
+private:
+  static const int kDefaultNumThreadpoolThreads = 4;
+
  public:
    ExternalContext() : _ruy_context(new ::ruy::Context)
    {
@@ -43,7 +41,7 @@ public:
    void setMaxNumThreads(int max_num_threads)
    {
      const int target_num_threads =
-        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+      max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
      _ruy_context->set_max_num_threads(target_num_threads);
    }
  
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc

index cd28250681786e6ebdf2e7141a14374045127d67..c2f6a1f7953f483ff69e30a4942514f40202ee1b 100644 (file)
--- a/runtime/onert/backend/ruy/KernelGenerator.cc
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -35,62 +35,58 @@ namespace backend
  namespace ruy
  {
  
-KernelGenerator::KernelGenerator(
-    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-    const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
-    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
-    const std::shared_ptr<ExternalContext> &external_context)
-    : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
-      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
  {
-  // DO NOTHING
-}
+  auto ret = std::make_unique<exec::FunctionSequence>();
  
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
-  assert(!_return_fn_seq);
    assert(_tensor_builder->dynamicTensorManager());
    assert(_tensor_reg);
  
    auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
  
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
    // Prepare to handle dynamic tensors later
    auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
    {
-    dyn_ctx->op_seq = &op_seq;
+    dyn_ctx->op_ind = ind;
      dyn_ctx->operations = &_operations_ctx;
      dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
  
-    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+    ret->dynamic_tensor_ctx(dyn_ctx);
    }
  
-  _current_layout = op_seq.getLayout();
-  for (const auto &operation_idx : op_seq.operations())
+  auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  assert(_return_fn); // _return_fn must have been generated
+  ret->append(std::move(_return_fn));
+
+  for (auto ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
    {
-    const auto &node = _operations_ctx.at(operation_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
+    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+    if (portable_tensor)
+    {
+      assert(portable_tensor->layout() == ir::Layout::NHWC);
+    }
  
-    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+    auto tensor = _tensor_reg->getNativeTensor(ind);
+    if (tensor)
      {
-      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
-      if (portable_tensor)
-      {
-        assert(portable_tensor->layout() == ir::Layout::NHWC);
-      }
-
-      auto tensor = _tensor_reg->getNativeTensor(ind);
-      if (tensor)
-      {
-        tensor->increase_ref();
-      }
+      tensor->increase_ref();
      }
    }
+  return ret;
+}
+
+KernelGenerator::KernelGenerator(
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+  const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+  const std::shared_ptr<ExternalContext> &external_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
+{
+  // DO NOTHING
  }
  
  void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -131,8 +127,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto ker_width = ker_shape.dim(2);
  
    const auto padding =
-      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
-                           dilation.width_factor, dilation.height_factor);
+    ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
  
    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
                  padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h

index 0f6bd590aea3791ee3ac0ef2689e610355b6cdd7..31551c46c4d9d848cd410399ddafb195fc45e679 100644 (file)
--- a/runtime/onert/backend/ruy/KernelGenerator.h
+++ b/runtime/onert/backend/ruy/KernelGenerator.h
@@ -19,11 +19,11 @@
  
  #include "ExternalContext.h"
  #include "TensorBuilder.h"
-#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/basic/TensorRegistry.h"
  #include "Tensor.h"
  
  #include <backend/CustomKernelBuilder.h>
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
  #include <ir/Operands.h>
  #include <ir/Operations.h>
  
@@ -34,26 +34,27 @@ namespace backend
  namespace ruy
  {
  
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
  {
  public:
-  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder,
-                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
                    const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                    const std::shared_ptr<ExternalContext> &external_context);
  
-  void visit(const ir::OpSequence &) override;
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
    void visit(const ir::operation::Conv2D &) override;
    void visit(const ir::operation::FullyConnected &) override;
  
  private:
    const ir::Operands &_ctx;
    const ir::Operations &_operations_ctx;
+  const ir::Layout _current_layout;
    std::shared_ptr<TensorBuilder> _tensor_builder;
-  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
    std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
-  ir::Layout _current_layout;
    const std::shared_ptr<ExternalContext> _external_context;
  };
  
diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h

index af2d252410e8786492f949a05d6f6b8242478d62..867e4dedbe33dc2dd2afde1991f16e0e3f133000 100644 (file)
--- a/runtime/onert/backend/ruy/StaticTensorManager.h
+++ b/runtime/onert/backend/ruy/StaticTensorManager.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
  #define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
  
-#include "backend/cpu_common/StaticTensorManager.h"
+#include "backend/basic/StaticTensorManager.h"
  
  namespace onert
  {
@@ -26,7 +26,7 @@ namespace backend
  namespace ruy
  {
  
-using StaticTensorManager = cpu_common::StaticTensorManager;
+using StaticTensorManager = basic::StaticTensorManager;
  
  } // namespace ruy
  } // namespace backend
diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h

index 60d0fbf77688c03bbafbc481f89ba45cbd1f1c4e..658086018c95fb8c7f414aec63679d59a7369cb5 100644 (file)
--- a/runtime/onert/backend/ruy/Tensor.h
+++ b/runtime/onert/backend/ruy/Tensor.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_BACKEND_RUY_TENSOR_H__
  #define __ONERT_BACKEND_RUY_TENSOR_H__
  
-#include <backend/cpu_common/Tensor.h>
+#include <backend/basic/Tensor.h>
  #include <ir/Data.h>
  
  namespace onert
@@ -27,8 +27,8 @@ namespace backend
  namespace ruy
  {
  
-using Tensor = cpu_common::Tensor;
-using ExternalTensor = cpu_common::ExternalTensor;
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
  
  } // namespace ruy
  } // namespace backend
diff --git a/runtime/onert/backend/ruy/TensorBuilder.cc b/runtime/onert/backend/ruy/TensorBuilder.cc

deleted file mode 100644 (file)

index c77defc..0000000
--- a/runtime/onert/backend/ruy/TensorBuilder.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace ruy
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
-    : _tensor_reg{tensor_reg},
-      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
-      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
-{
-  /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                       ir::Layout layout)
-{
-  _tensor_info_map.emplace(ind, info);
-
-  // CPU backend supports only one layout as NHWC
-  assert(layout == ir::Layout::NHWC);
-  if (info.isDynamic())
-  {
-    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
-  }
-  else
-  {
-    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
-  }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
-  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
-  const auto tensor_info = _tensor_info_map.at(ind);
-
-  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
-  {
-    const auto size = tensor_info.total_size();
-    _static_tensor_mgr->claimPlan(ind, size);
-  }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
-  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
-  {
-    _static_tensor_mgr->releasePlan(ind);
-  }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
-  return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
-  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
-  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-} // namespace ruy
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h

index 91c07bd8218f4793dbd249c531c18ff6cd8f9ae6..15d4e5b29a7bf1e4bbaafd6ce02d7e9728160ff0 100644 (file)
--- a/runtime/onert/backend/ruy/TensorBuilder.h
+++ b/runtime/onert/backend/ruy/TensorBuilder.h
@@ -17,15 +17,7 @@
  #ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
  #define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
  
-#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-
-#include <ir/OperandIndexMap.h>
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <unordered_map>
+#include <backend/basic/TensorBuilder.h>
  
  namespace onert
  {
@@ -34,37 +26,7 @@ namespace backend
  namespace ruy
  {
  
-class TensorBuilder
-{
-public:
-  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
-
-  /**
-   * @brief     Register tensor information to allocate on CPU backend
-   * @param[in] ind    Operand index
-   * @param[in] info   Operand information
-   * @param[in] layout Operand data layout
-   */
-  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout);
-
-  void notifyFirstUse(const ir::OperandIndex &);
-  void notifyLastUse(const ir::OperandIndex &);
-
-  bool isRegistered(const ir::OperandIndex &) const;
-
-  void prepare(void);
-  void allocate();
-  void postFunctionPrepare() { /* DO NOTHING */}
-
-  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
-
-private:
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
-  std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
-  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
-  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-};
+using TensorBuilder = basic::TensorBuilder;
  
  } // namespace ruy
  } // namespace backend
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc

index d249b2ce36cfa3c39169ce0d501fa5807a6180d4..1a24410826a86607d4bdacc27fe2609bbd8c6b6c 100644 (file)
--- a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
@@ -28,11 +28,11 @@ namespace ruy
  namespace ops
  {
  ConvolutionLayer::ConvolutionLayer()
-    : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
-      _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
-      _dilationHeightFactor(1), _activation(ir::Activation::NONE),
-      _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
+  : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+    _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+    _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+    _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
  {
    // DO NOTHING
  }
@@ -115,8 +115,8 @@ void ConvolutionLayer::run()
      param_padding.param.bottom = _paddingBottom;
  
      const auto padding =
-        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
-                             _dilationWidthFactor, _dilationHeightFactor);
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           _dilationWidthFactor, _dilationHeightFactor);
  
      _paddingLeft = padding.left;
      _paddingRight = padding.right;
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc

index af693e3b4869c63461c753e3887704f878275b94..9c9f3117907cbcf7ccec5816d5413a6e984fb968 100644 (file)
--- a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
@@ -30,8 +30,8 @@ namespace ops
  {
  
  FullyConnectedLayer::FullyConnectedLayer()
-    : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
-      _activation(ir::Activation::NONE), _external_context(nullptr)
+  : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+    _activation(ir::Activation::NONE), _external_context(nullptr)
  {
    // DO NOTHING
  }
@@ -51,11 +51,11 @@ void FullyConnectedLayer::fullyConnectedFloat32()
    op_params.rhs_cacheable = _input->is_constant();
  
    nnfw::ruy::FullyConnected(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
-      _external_context->ruy_context());
+    op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+    getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+    getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+    getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+    _external_context->ruy_context());
  }
  
  void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h

index b7aef1625d5c934395b0b96ef47659128a4b04b5..67494a534cdfb5ab6cb05a8505b01af8346137db 100644 (file)
--- a/runtime/onert/backend/xnnpack/Backend.h
+++ b/runtime/onert/backend/xnnpack/Backend.h
@@ -19,7 +19,6 @@
  
  #include "BackendContext.h"
  #include "Config.h"
-#include "ConstantInitializer.h"
  #include "KernelGenerator.h"
  
  #include <backend/Backend.h>
@@ -40,19 +39,16 @@ public:
  
    std::shared_ptr<IConfig> config() const override { return _config; }
  
-  std::unique_ptr<onert::backend::BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
-             bool) const override
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
    {
-    const auto &operands = graph.operands();
-    const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto custom_kernel_builder = data.custom_kernel_builder;
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
      auto tb = std::make_shared<TensorBuilder>(tr);
      context->tensor_registry = tr;
      context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
                                                              context->external_context());
      return context;
    }
diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc

index 503d088aa2a8517043af88fa73fe0c6a1d6a4aab..42fffb608dbeca91166ad01eae83adf25bb1ec88 100644 (file)
--- a/runtime/onert/backend/xnnpack/BackendContext.cc
+++ b/runtime/onert/backend/xnnpack/BackendContext.cc
@@ -22,7 +22,7 @@
  #include "ir/Index.h"
  #include "ir/OperandIndexMap.h"
  #include "ir/OperandIndexSequence.h"
-#include "backend/cpu_common/BackendContextHelpers.h"
+#include "backend/basic/BackendContextHelpers.h"
  
  namespace onert
  {
@@ -31,107 +31,24 @@ namespace backend
  namespace xnnpack
  {
  
-void BackendContext::initConsts()
-{
-  for (auto &op : operation_list())
-  {
-    constant_initializer->setLayout(op.layout);
-    graph()->operations().at(op.index).accept(*constant_initializer);
-  }
-
-  for (auto ind : operand_list())
-  {
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  }
-
-  constant_initializer->run();
-}
-
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                            const ir::OpSequences &op_seqs,
-                                            const ir::LowerInfoMap &lower_info)
-{
-  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
-                  ir::Remove::DUPLICATED;
-  for (auto index : operand_list())
-  {
-    if (model_io.contains(index))
-      continue;
-    const auto &obj = graph()->operands().at(index);
-    const auto frontend_layout = [&]() {
-      if (obj.getUses().size() == 0)
-        return ir::Layout::UNKNOWN;
-      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
-      for (auto &operation_info : operation_list())
-      {
-        if (operation_info.index == use_op_ind)
-          return operation_info.layout;
-      }
-      return ir::Layout::UNKNOWN;
-    }();
-    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
-    if (permute_factor.backend() != backend())
-      continue;
-    const auto backend_layout = permute_factor.layout();
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
-  }
-
-  // TODO Get compiler options from compiler, and use it rather than getting it from Env
-  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
-  {
-    cpu_common::planTensors(*this, order, op_seqs, lower_info);
-  }
-  else
-  {
-    // For the executors that does not have fixed linear execution order:
-    // To make tensors never be deallocated, this is a workaround to use static memory planner
-    for (auto ind : operand_list())
-    {
-      if (tensor_builder->isRegistered(ind))
-        tensor_builder->notifyFirstUse(ind);
-    }
-  }
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
  
-  tensor_builder->prepare();
-
-  return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                       const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
  {
    FunctionMap ret;
  
-  for (auto op_seq_ind : order)
+  for (auto op_ind : _data.op_order)
    {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    bool assigned = [&]() {
-      for (auto op_info : operation_list())
-        if (op_seq.exist(op_info.index))
-          return true;
-      return false;
-    }();
-    if (!assigned)
-      continue;
-    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
-    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
    }
  
-  initConsts();
+  basic::initConsts(*this);
  
    // NOTE For memory optimization, we want to free some operand data
-  for (auto ind : operand_list())
-  {
-    // TODO Remove const_cast
-    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
-    obj.releaseData();
-  }
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
  
    for (auto &it : ret)
    {
diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h

index f81175b9ef114321c2174db684fb3016e6ce34e9..e3b66eef37482a3abdfe8ae419fb2b1cab0c386c 100644 (file)
--- a/runtime/onert/backend/xnnpack/BackendContext.h
+++ b/runtime/onert/backend/xnnpack/BackendContext.h
@@ -20,14 +20,10 @@
  #include <backend/BackendContext.h>
  #include <util/ConfigSource.h>
  #include "TensorBuilder.h"
-#include "ConstantInitializer.h"
  #include "KernelGenerator.h"
  #include "ExternalContext.h"
  
-namespace
-{
  const int kDefaultNumThreadpoolThreads = 1;
-}
  
  namespace onert
  {
@@ -39,14 +35,12 @@ namespace xnnpack
  class BackendContext : public onert::backend::BackendContext
  {
  public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
+  BackendContext(const Backend *backend, ContextData &&data,
                   std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                   std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
                   std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry),
-        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
-        kernel_gen{kernel_gen}, _external_context(nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(nullptr)
    {
      int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS);
      if (num_threads < 1)
@@ -54,24 +48,14 @@ public:
      _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads)));
    }
  
-  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                              const ir::OpSequences &op_seqs,
-                              const ir::LowerInfoMap &lower_info) override;
-
-  FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
-                         const ir::OpSequences &op_seqs) override;
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
  
    std::shared_ptr<ExternalContext> external_context() { return _external_context; }
  
-private:
-  void initConsts();
-  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
-
  public:
    // TODO Make it private
    std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
    std::shared_ptr<KernelGenerator> kernel_gen;
  
  private:
diff --git a/runtime/onert/backend/xnnpack/Config.cc b/runtime/onert/backend/xnnpack/Config.cc

index 4d42a3f18cdcf9352beb792e7b55d1246dad8a18..8783ff390b6b2f17e6816887bf89c0233da3e9e5 100644 (file)
--- a/runtime/onert/backend/xnnpack/Config.cc
+++ b/runtime/onert/backend/xnnpack/Config.cc
@@ -39,6 +39,6 @@ bool Config::initialize()
  
  ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
  
-} // namespace cpu
+} // namespace xnnpack
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h

deleted file mode 100644 (file)

index 45cdd8c..0000000
--- a/runtime/onert/backend/xnnpack/ConstantInitializer.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
-
-#include <backend/cpu_common/ConstantInitializer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace xnnpack
-{
-
-using ConstantInitializer = cpu_common::ConstantInitializer;
-
-} // namespace xnnpack
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.cc b/runtime/onert/backend/xnnpack/ExternalContext.cc

index 3a9fe1b5502ec607f86855e0d752d1822f07f78e..1fbcd4f02a37c151d8d0dd9eccedcd404854a4a8 100644 (file)
--- a/runtime/onert/backend/xnnpack/ExternalContext.cc
+++ b/runtime/onert/backend/xnnpack/ExternalContext.cc
@@ -26,7 +26,7 @@ namespace xnnpack
  {
  
  ExternalContext::ExternalContext(size_t num_threads)
-    : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
+  : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
  {
    assert(_threadpool);
  }
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc

index b7d3f60fb38502ca7aab446ee8c66fa99529dcc6..28f729d77b6143071f3cad75e4b2bdd4bfda04d1 100644 (file)
--- a/runtime/onert/backend/xnnpack/KernelGenerator.cc
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -37,61 +37,57 @@ namespace xnnpack
  {
  
  KernelGenerator::KernelGenerator(
-    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-    const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
-    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
-    const std::shared_ptr<ExternalContext> &external_context)
-    : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
-      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+  const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+  const std::shared_ptr<ExternalContext> &external_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
  {
    // DO NOTHING
  }
  
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
  {
-  assert(!_return_fn_seq);
+  auto ret = std::make_unique<exec::FunctionSequence>();
+
    assert(_tensor_builder->dynamicTensorManager());
    assert(_tensor_reg);
  
    auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
  
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
    // Prepare to handle dynamic tensors later
    auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
    {
-    dyn_ctx->op_seq = &op_seq;
+    dyn_ctx->op_ind = ind;
      dyn_ctx->operations = &_operations_ctx;
      dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
  
-    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+    ret->dynamic_tensor_ctx(dyn_ctx);
    }
  
-  _current_layout = op_seq.getLayout();
-  for (const auto &operation_idx : op_seq.operations())
+  auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  assert(_return_fn); // _return_fn must have been generated
+  ret->append(std::move(_return_fn));
+
+  for (auto ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
    {
-    const auto &node = _operations_ctx.at(operation_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
+    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+    if (portable_tensor)
+    {
+      assert(portable_tensor->layout() == ir::Layout::NHWC);
+    }
  
-    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+    auto tensor = _tensor_reg->getNativeTensor(ind);
+    if (tensor)
      {
-      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
-      if (portable_tensor)
-      {
-        assert(portable_tensor->layout() == ir::Layout::NHWC);
-      }
-
-      auto tensor = _tensor_reg->getNativeTensor(ind);
-      if (tensor)
-      {
-        tensor->increase_ref();
-      }
+      tensor->increase_ref();
      }
    }
+  return ret;
  }
  
  void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -122,8 +118,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto ker_width = ker_shape.dim(2);
  
    const auto padding =
-      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
-                           dilation.width_factor, dilation.height_factor);
+    ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
  
    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
                  padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h

index 2658242042832c8c256c38ac65863facaea09777..271a60653e53cabe0c36380a95bc229a1500c74e 100644 (file)
--- a/runtime/onert/backend/xnnpack/KernelGenerator.h
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.h
@@ -19,11 +19,11 @@
  
  #include "ExternalContext.h"
  #include "TensorBuilder.h"
-#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/basic/TensorRegistry.h"
  #include "Tensor.h"
  
  #include <backend/CustomKernelBuilder.h>
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
  #include <ir/Operands.h>
  #include <ir/Operations.h>
  
@@ -34,16 +34,17 @@ namespace backend
  namespace xnnpack
  {
  
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
  {
  public:
-  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder,
-                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
                    const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                    const std::shared_ptr<ExternalContext> &external_context);
  
-  void visit(const ir::OpSequence &) override;
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
    void visit(const ir::operation::Conv2D &) override;
    void visit(const ir::operation::DepthwiseConv2D &) override;
    void visit(const ir::operation::FullyConnected &) override;
@@ -51,10 +52,10 @@ public:
  private:
    const ir::Operands &_ctx;
    const ir::Operations &_operations_ctx;
+  ir::Layout _current_layout;
    std::shared_ptr<TensorBuilder> _tensor_builder;
-  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
    std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
-  ir::Layout _current_layout;
    const std::shared_ptr<ExternalContext> _external_context;
  };
  
diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h

index f7344e8d8517ca709c5b4d9209d4d7fb9b9c8af1..adaa3623d4602c0c109b4317162b456cd8ad7f88 100644 (file)
--- a/runtime/onert/backend/xnnpack/StaticTensorManager.h
+++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
  #define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
  
-#include "backend/cpu_common/StaticTensorManager.h"
+#include "backend/basic/StaticTensorManager.h"
  
  namespace onert
  {
@@ -26,7 +26,7 @@ namespace backend
  namespace xnnpack
  {
  
-using StaticTensorManager = cpu_common::StaticTensorManager;
+using StaticTensorManager = basic::StaticTensorManager;
  
  } // namespace xnnpack
  } // namespace backend
diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h

index b39cbd266b6a429600006f171e49ef8aff71bb95..147361109fec2c777a6bd2e9392afb9a37f55b47 100644 (file)
--- a/runtime/onert/backend/xnnpack/Tensor.h
+++ b/runtime/onert/backend/xnnpack/Tensor.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__
  #define __ONERT_BACKEND_XNNPACK_TENSOR_H__
  
-#include <backend/cpu_common/Tensor.h>
+#include <backend/basic/Tensor.h>
  #include <ir/Data.h>
  
  namespace onert
@@ -27,8 +27,8 @@ namespace backend
  namespace xnnpack
  {
  
-using Tensor = cpu_common::Tensor;
-using ExternalTensor = cpu_common::ExternalTensor;
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
  
  } // namespace xnnpack
  } // namespace backend
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.cc b/runtime/onert/backend/xnnpack/TensorBuilder.cc

deleted file mode 100644 (file)

index b570144..0000000
--- a/runtime/onert/backend/xnnpack/TensorBuilder.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace xnnpack
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
-    : _tensor_reg{tensor_reg},
-      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
-      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
-{
-  /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                       ir::Layout layout)
-{
-  _tensor_info_map.emplace(ind, info);
-
-  // XNNPACK backend supports only one layout as NHWC
-  assert(layout == ir::Layout::NHWC);
-  if (info.isDynamic())
-  {
-    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
-  }
-  else
-  {
-    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
-  }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
-  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
-  const auto tensor_info = _tensor_info_map.at(ind);
-
-  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
-  {
-    const auto size = tensor_info.total_size();
-    _static_tensor_mgr->claimPlan(ind, size);
-  }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
-  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
-  {
-    _static_tensor_mgr->releasePlan(ind);
-  }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
-  return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
-  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
-  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-} // namespace xnnpack
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h

index dddfedbf9adcc3135dc6113ad214db8314941632..cbb7c9e18675fbefb53f9f9fbfdead58f4268f11 100644 (file)
--- a/runtime/onert/backend/xnnpack/TensorBuilder.h
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.h
@@ -17,15 +17,7 @@
  #ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
  #define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
  
-#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-
-#include <ir/OperandIndexMap.h>
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <unordered_map>
+#include <backend/basic/TensorBuilder.h>
  
  namespace onert
  {
@@ -34,37 +26,7 @@ namespace backend
  namespace xnnpack
  {
  
-class TensorBuilder
-{
-public:
-  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
-
-  /**
-   * @brief     Register tensor information to allocate on XNNPACK backend
-   * @param[in] ind    Operand index
-   * @param[in] info   Operand information
-   * @param[in] layout Operand data layout
-   */
-  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout);
-
-  void notifyFirstUse(const ir::OperandIndex &);
-  void notifyLastUse(const ir::OperandIndex &);
-
-  bool isRegistered(const ir::OperandIndex &) const;
-
-  void prepare(void);
-  void allocate();
-  void postFunctionPrepare() { /* DO NOTHING */}
-
-  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
-
-private:
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
-  std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
-  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
-  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-};
+using TensorBuilder = basic::TensorBuilder;
  
  } // namespace xnnpack
  } // namespace backend
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc

index 0612995c2902f39d84b3b5d88b05e494b2c53fff..32ca994604fa34130c2519c46ebdca3e49d05281 100644 (file)
--- a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
@@ -27,10 +27,10 @@ namespace xnnpack
  namespace ops
  {
  ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context)
-    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
-      _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
-      _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0),
-      _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+  : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), _padding_right(0),
+    _padding_bottom(0), _stride_width(0), _stride_height(0), _dilation_width_factor(1),
+    _dilation_height_factor(1), _activation(ir::Activation::NONE)
  {
    // DO NOTHING
  }
@@ -105,14 +105,13 @@ bool ConvolutionLayer::create()
    assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
  
    enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
-      _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
-      _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
-      1 /* groups */, input_channels /* group_input_channels */,
-      output_channels /* group_output_channels */, input_channels /* input_channel_stride */,
-      output_channels /* output_channel_stride */,
-      reinterpret_cast<const float *>(_kernel->buffer()),
-      reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
-      output_activation_max, 0, &_kernel_op);
+    _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+    _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, 1 /* groups */,
+    input_channels /* group_input_channels */, output_channels /* group_output_channels */,
+    input_channels /* input_channel_stride */, output_channels /* output_channel_stride */,
+    reinterpret_cast<const float *>(_kernel->buffer()),
+    reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
+    0, &_kernel_op);
    if (status != xnn_status_success)
    {
      throw std::runtime_error{"failed to create FP32 Convolution operator"};
@@ -133,9 +132,9 @@ bool ConvolutionLayer::setup()
    uint32_t input_height = _input->getShape().dim(1);
    uint32_t batch_size = _input->getShape().dim(0);
    enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
-      _kernel_op, batch_size, input_height, input_width,
-      reinterpret_cast<const float *>(_input->buffer()),
-      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+    _kernel_op, batch_size, input_height, input_width,
+    reinterpret_cast<const float *>(_input->buffer()), reinterpret_cast<float *>(_output->buffer()),
+    _external_context->getThreadPool());
    if (status != xnn_status_success)
    {
      throw std::runtime_error{"failed to create FP32 Convolution operator"};
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc

index 947f041944e4f202376ed871bfdcf46133dd7058..9a671d4872481ff157a5d77df81d0636063bdaf6 100644 (file)
--- a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
@@ -28,21 +28,21 @@ namespace ops
  {
  
  DepthwiseConvolutionLayer::DepthwiseConvolutionLayer(
-    const std::shared_ptr<ExternalContext> external_context)
-    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
-      _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
-      _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
-      _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+  const std::shared_ptr<ExternalContext> external_context)
+  : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), _padding_right(0),
+    _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
+    _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
  {
    // DO NOTHING
  }
  
  void DepthwiseConvolutionLayer::configure(
-    const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
-    ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
-    const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
-    const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
-    const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
+  const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+  ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
+  const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
+  const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
+  const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
  {
    _input = input;
    _kernel = kernel;
@@ -106,14 +106,13 @@ bool DepthwiseConvolutionLayer::create()
    assert(output_channels == input_channels * _multiplier);
  
    enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
-      _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
-      _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
-      input_channels /* groups */, 1 /* group_input_channels */,
-      _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
-      output_channels /* output_channel_stride */,
-      reinterpret_cast<const float *>(_kernel->buffer()),
-      reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
-      output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
+    _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+    _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+    input_channels /* groups */, 1 /* group_input_channels */,
+    _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
+    output_channels /* output_channel_stride */, reinterpret_cast<const float *>(_kernel->buffer()),
+    reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
+    XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
    if (status != xnn_status_success)
    {
      throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
@@ -134,9 +133,9 @@ bool DepthwiseConvolutionLayer::setup()
    uint32_t input_height = _input->getShape().dim(1);
    uint32_t batch_size = _input->getShape().dim(0);
    enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
-      _kernel_op, batch_size, input_height, input_width,
-      reinterpret_cast<const float *>(_input->buffer()),
-      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+    _kernel_op, batch_size, input_height, input_width,
+    reinterpret_cast<const float *>(_input->buffer()), reinterpret_cast<float *>(_output->buffer()),
+    _external_context->getThreadPool());
    if (status != xnn_status_success)
    {
      throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc

index d595fda36b4801b6726a6b15bb8628721c78d9a1..66171ad425d25bba15fcea5c61702b0a5b64cc19 100644 (file)
--- a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
@@ -28,8 +28,8 @@ namespace ops
  {
  
  FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
-    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
-      _activation(ir::Activation::NONE)
+  : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _activation(ir::Activation::NONE)
  {
    // DO NOTHING
  }
@@ -102,9 +102,9 @@ bool FullyConnectedLayer::create()
    const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
  
    enum xnn_status status = xnn_create_fully_connected_nc_f32(
-      input_channels, output_channels, input_channels /* input stride */,
-      output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
-      output_activation_max, flag, &_kernel_op);
+    input_channels, output_channels, input_channels /* input stride */,
+    output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+    output_activation_max, flag, &_kernel_op);
    if (status != xnn_status_success)
    {
      throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
@@ -123,8 +123,8 @@ bool FullyConnectedLayer::setup()
  
    uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
    enum xnn_status status = xnn_setup_fully_connected_nc_f32(
-      _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
-      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+    _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+    reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
    if (status != xnn_status_success)
    {
      throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h

index 68b610f3301a7ca5da7d250f72e7cdbf0ddf9c5f..ec07e874f728011fd400dd3a17403be11929dc3c 100644 (file)
--- a/runtime/onert/backend/xnnpack/ops/Layer.h
+++ b/runtime/onert/backend/xnnpack/ops/Layer.h
@@ -41,7 +41,7 @@ class Layer : public ::onert::exec::IFunction
  {
  public:
    Layer(const std::shared_ptr<ExternalContext> external_context)
-      : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
+    : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt

index ea212a42b271aaaf54be307a47e6206a90115885..6dbadf80bf1c496eb730b808774280b47470bba6 100644 (file)
--- a/runtime/onert/core/CMakeLists.txt
+++ b/runtime/onert/core/CMakeLists.txt
@@ -43,12 +43,12 @@ if(NOT ENABLE_TEST)
  endif(NOT ENABLE_TEST)
  
  # Unit Tests
-set(TEST_ONERT_BACKEND_CPU_COMMON test_onert_backend_cpu_common)
+set(TEST_ONERT_CORE test_onert_core)
  
-add_executable(${TEST_ONERT_BACKEND_CPU_COMMON} ${TESTS})
+add_executable(${TEST_ONERT_CORE} ${TESTS})
  
-target_link_libraries(${TEST_ONERT_BACKEND_CPU_COMMON} onert_core)
-target_link_libraries(${TEST_ONERT_BACKEND_CPU_COMMON} gtest gtest_main dl ${LIB_PTHREAD})
+target_link_libraries(${TEST_ONERT_CORE} onert_core)
+target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
  
-add_test(${TEST_ONERT_BACKEND_CPU_COMMON} ${TEST_ONERT_BACKEND_CPU_COMMON})
-install(TARGETS ${TEST_ONERT_BACKEND_CPU_COMMON} DESTINATION unittest_standalone)
+add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
+install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest_standalone)
diff --git a/runtime/onert/core/include/backend/Backend.h b/runtime/onert/core/include/backend/Backend.h

index 4f6ebbba7e5f60414431524ec63a82aa9b765223..136a76fbadb004a8b7ad6ddb1ecb8f12f243f9f4 100644 (file)
--- a/runtime/onert/core/include/backend/Backend.h
+++ b/runtime/onert/core/include/backend/Backend.h
@@ -39,9 +39,7 @@ public:
    virtual ~Backend() = default;
    virtual std::shared_ptr<onert::backend::IConfig> config() const = 0;
  
-  virtual std::unique_ptr<BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<backend::custom::IKernelBuilder> &kb,
-             bool is_linear_executor) const = 0;
+  virtual std::unique_ptr<BackendContext> newContext(ContextData &&) const = 0;
  };
  
  } // namespace backend
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h

index 4d212156a7ee59c4c65baf40e562a91bdb78b876..ccecc2d34e8050094b4d01fac8090c4019664a60 100644 (file)
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -19,7 +19,9 @@
  
  #include <memory>
  #include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
+#include "ir/OperationIndexMap.h"
+#include "ir/OperandIndexMap.h"
+#include "compiler/GraphLowerInfo.h"
  #include "exec/FunctionSequence.h"
  
  namespace onert
@@ -31,53 +33,47 @@ class Backend;
  struct ITensorRegistry;
  
  using FunctionMap =
-    std::vector<std::pair<ir::OpSequenceIndex, std::unique_ptr<exec::FunctionSequence>>>;
+  std::vector<std::pair<ir::OperationIndex, std::unique_ptr<exec::FunctionSequence>>>;
  
-class BackendContext
+struct ContextData
  {
-public:
-  struct OperationInfo
-  {
-    ir::OperationIndex index;
-    ir::Layout layout;
-
-    OperationInfo(ir::OperationIndex index, ir::Layout layout) : index{index}, layout{layout} {}
-  };
+  /* A partial graph that only includes used operand/operations of the original graph */
+  std::unique_ptr<ir::Graph> graph;
+  /* A linear order of operations. This is neccessary for when a graph is not fully connected */
+  std::vector<onert::ir::OperationIndex> op_order;
+  /* Operands that are defined by other backends */
+  util::Set<ir::OperandIndex> external_operands;
+  /* Operand layout info */
+  ir::OperandIndexMap<ir::Layout> operand_layouts;
+  /* Custom kernel builder */
+  std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
+  /* Is linear executor or not */
+  bool is_linear_executor;
+};
  
+class BackendContext
+{
  public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
+  BackendContext(const Backend *backend, ContextData &&data,
                   std::shared_ptr<ITensorRegistry> tensor_registry = nullptr)
-      : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry}
+    : _backend{backend}, _data{std::move(data)}, tensor_registry{tensor_registry}
    {
    }
  
    virtual ~BackendContext() = default;
  
-  void initialize(const std::vector<OperationInfo> &operation_list,
-                  const std::vector<ir::OperandIndex> &operand_list);
-  void initConsts();
-
    const Backend *backend() const { return _backend; }
-  const ir::Graph *graph() const { return _graph; }
-  const std::vector<OperationInfo> &operation_list() const { return _operation_list; }
-  const std::vector<ir::OperandIndex> &operand_list() const { return _operand_list; }
+  const ir::Graph *graph() const { return _data.graph.get(); }
+  const util::Set<ir::OperandIndex> &external_operands() const { return _data.external_operands; }
+  const ir::OperandIndexMap<ir::Layout> &operand_layouts() const { return _data.operand_layouts; }
+  const ContextData &data() const { return _data; }
  
-  virtual ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &,
-                                      const ir::OpSequences &, const ir::LowerInfoMap &)
-  {
-    return nullptr;
-  }
-  virtual FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &,
-                                 const ir::OpSequences &)
-  {
-    return {};
-  }
+  virtual ITensorRegistry *genTensors() = 0;
+  virtual FunctionMap genKernels() = 0;
  
-private:
+protected:
    const Backend *_backend{nullptr};
-  const ir::Graph *_graph{nullptr};
-  std::vector<OperationInfo> _operation_list;
-  std::vector<ir::OperandIndex> _operand_list;
+  ContextData _data;
  
  public:
    std::shared_ptr<ITensorRegistry> tensor_registry;
diff --git a/runtime/onert/core/include/backend/IConfig.h b/runtime/onert/core/include/backend/IConfig.h

index ef9c5cdb2d4bc3432518a642ce35b3c567edb89c..409fd3d9fbdd1dbbe8b6892ce28a4a837daa0b7c 100644 (file)
--- a/runtime/onert/core/include/backend/IConfig.h
+++ b/runtime/onert/core/include/backend/IConfig.h
@@ -54,7 +54,7 @@ struct IConfig
     */
    virtual ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) = 0;
    /**
-   * @brief The function that is called after each OpSequence run on profiling mode.
+   * @brief The function that is called after each Operation run on profiling mode.
     *        This may be useful for profiling GPU-based or special computing units.
     */
    virtual void sync() const {}
diff --git a/runtime/onert/core/include/backend/IDynamicTensorManager.h b/runtime/onert/core/include/backend/IDynamicTensorManager.h

deleted file mode 100644 (file)

index 67cfda2..0000000
--- a/runtime/onert/core/include/backend/IDynamicTensorManager.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
-
-#include "ITensorManager.h"
-
-#include <ir/Index.h>
-#include <ir/Operation.h>
-#include <ir/Shape.h>
-#include <backend/ITensor.h>
-
-namespace onert
-{
-namespace backend
-{
-
-/**
- * @brief Interface as an abstract tensor manager, providing ways to handle memory
- *        for dynamic tensors.
- */
-struct IDynamicTensorManager : public ITensorManager
-{
-  virtual ~IDynamicTensorManager() = default;
-
-public:
-  /**
-   * @brief Plan when to delete a tensor. Note this planning is done at compilation time.
-   * @param op_ind        operation index
-   * @param tensor        candidate ITensor to dealloc. Tensor can be static
-   *                      or dynamic since tensor type may not be clearly known at compilation time.
-   */
-  virtual void planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor) = 0;
-
-  /**
-   * @brief Deallocate input tensors of op if an input tensor is a dynamic tensor and it won't
-   *        be used anymore
-   * @note  This will work after calling planDealloc
-   */
-  virtual void deallocInput(ir::OperationIndex op_ind) = 0;
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/IMemoryManager.h b/runtime/onert/core/include/backend/IMemoryManager.h

deleted file mode 100644 (file)

index bad2fd5..0000000
--- a/runtime/onert/core/include/backend/IMemoryManager.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IMEMORY_MANAGER_H__
-#define __ONERT_BACKEND_IMEMORY_MANAGER_H__
-
-namespace onert
-{
-namespace backend
-{
-
-struct IMemoryManager
-{
-  virtual ~IMemoryManager() = default;
-
-  virtual void allocate(void) = 0;
-  virtual void deallocate(void) = 0;
-};
-
-} // namespace backend
-} // namespace onert
-
-#include <unordered_set>
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-
-using MemoryManagerSet = std::unordered_set<std::unique_ptr<backend::IMemoryManager>>;
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IMEMORY_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/IPortableTensor.h b/runtime/onert/core/include/backend/IPortableTensor.h

index 1b1f05fe1e401b86b6afa712f7330fba06c99a49..608ca4407af83dd2fcc766e713de39fceba09835 100644 (file)
--- a/runtime/onert/core/include/backend/IPortableTensor.h
+++ b/runtime/onert/core/include/backend/IPortableTensor.h
@@ -43,6 +43,13 @@ public:
    virtual ~IPortableTensor();
    virtual const ir::Sparsity *sparsity() const { return nullptr; }
    const ir::OperandInfo &get_info() const { return _info; }
+  float data_scale() const override { return _info.typeInfo().scale(); }
+  int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
+  const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
+  const std::vector<int32_t> &data_zero_points() const override
+  {
+    return _info.typeInfo().zero_points();
+  }
  
  public:
    bool has_padding() const final { return false; }
diff --git a/runtime/onert/core/include/backend/IStaticTensorManager.h b/runtime/onert/core/include/backend/IStaticTensorManager.h

deleted file mode 100644 (file)

index cef1f8a..0000000
--- a/runtime/onert/core/include/backend/IStaticTensorManager.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
-
-#include "ITensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-
-struct IStaticTensorManager : public ITensorManager
-{
-  virtual ~IStaticTensorManager() = default;
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h

index 3fadda1f5ca145af1a97f8b21f4a07e4b6e2dfd6..0a4d9c8146d79e279bb9c434db869ad5ba8f42f8 100644 (file)
--- a/runtime/onert/core/include/backend/ITensor.h
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -32,23 +32,21 @@ namespace onert
  namespace backend
  {
  
-struct IDynamicTensorManager;
-
  class ITensor
  {
  public:
-  virtual ~ITensor() = default;
+  virtual ~ITensor();
  
  public:
    virtual uint8_t *buffer() const = 0;
    virtual size_t total_size() const = 0;
-  virtual size_t dimension(size_t index) const = 0;
-  virtual size_t num_dimensions() const = 0;
    virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
    virtual ir::Layout layout() const = 0;
    virtual ir::DataType data_type() const = 0;
    virtual float data_scale() const = 0;
-  virtual int32_t data_offset() const = 0;
+  virtual int32_t data_zero_point() const = 0;
+  virtual const std::vector<float> &data_scales() const = 0;
+  virtual const std::vector<int32_t> &data_zero_points() const = 0;
    virtual bool has_padding() const = 0;
    virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0;
  
@@ -88,6 +86,12 @@ public:
      throw std::runtime_error("This backend does not support dynamic tensor");
    }
  
+  /// @brief Dealloc the buffer (only for dynamic tensors)
+  virtual void deallocBuffer()
+  {
+    throw std::runtime_error("This backend does not support resetting buffer");
+  }
+
    /**
     * @brief Set the shape of tenser to new_shape
     * @note  Higer dimension will be placed on front.
@@ -102,7 +106,7 @@ public:
     * @brief Get ir::Shape of tensor
     * @note  Higer dimension will be placed on front.
     */
-  virtual ir::Shape getShape() const;
+  virtual ir::Shape getShape() const = 0;
  
    virtual bool is_subtensor() const { return false; }
    virtual bool needMemoryMap() const { return false; }
diff --git a/runtime/onert/core/include/backend/ITensorManager.h b/runtime/onert/core/include/backend/ITensorManager.h

deleted file mode 100644 (file)

index 4974b66..0000000
--- a/runtime/onert/core/include/backend/ITensorManager.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ITENSOR_MANAGER_H__
-
-namespace onert
-{
-namespace backend
-{
-
-// NOTE This name ITensorManager has been discussed whether or not the name is proper.
-// Anyone can argue with any better name.
-/**
- * @brief Interface as an abstract tensor manager which has MemoryManager
- *        This is used as a base class for IStaticTensorManager and IDynamicTensorManager
- */
-struct ITensorManager
-{
-  virtual ~ITensorManager() = default;
-};
-
-} // namespace backend
-} // namespace onert
-
-#include <unordered_set>
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-
-using TensorManagerSet = std::unordered_set<std::unique_ptr<backend::ITensorManager>>;
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/basic/Allocator.h b/runtime/onert/core/include/backend/basic/Allocator.h

new file mode 100644 (file)

index 0000000..ff60932
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/Allocator.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        Allocator.h
+ * @brief       This file contains Allocator related classes
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_ALLOCATOR_H__
+#define __ONERT_BACKEND_BASIC_ALLOCATOR_H__
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Class to allocate memory
+ */
+class Allocator
+{
+public:
+  Allocator(uint32_t capacity);
+  /**
+   * @brief Get memory base pointer
+   * @return base pointer
+   */
+  uint8_t *base() const { return _base.get(); }
+  void release() { _base.reset(); }
+
+private:
+  std::unique_ptr<uint8_t[]> _base;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_ALLOCATOR_H__
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h

new file mode 100644 (file)

index 0000000..58bfe34
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
+#define __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
+
+#include <vector>
+
+#include "ir/Index.h"
+#include "compiler/GraphLowerInfo.h"
+#include "util/logging.h"
+#include "backend/ITensorRegistry.h"
+#include "backend/BackendContext.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// TODO Remove the template param BackendContext once unification of cpu backend context is done
+template <typename T_BackendContext> void planTensors(const T_BackendContext &ctx)
+{
+  const ir::Graph &graph = *ctx.graph();
+  const auto &order = ctx.data().op_order;
+  auto tensor_builder = ctx.tensor_builder;
+
+  ir::OperandIndexMap<uint32_t> uses_map;
+  ir::OperandIndexMap<uint32_t> def_map;
+  ir::OperandIndexSequence constants;
+
+  auto model_io =
+    (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+  // Prepare scanning
+  graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (ctx.external_operands().contains(ind))
+      return;
+
+    // TODO Check if we need to handle unused tensors
+
+    uses_map[ind] = obj.getUses().size();
+    def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+    if (obj.isConstant())
+      constants.append(ind);
+
+    if (!tensor_builder->isRegistered(ind))
+    {
+      // These tensors do not exist in any  (No use and def)
+      const auto info = obj.info();
+      // NOTE Currently we only support NHWC tensors for cpu-common tensors.
+      //      There is no way to get the layout info from the backend context for now.
+      //      When we support NCHW tensors as well, we also need to change tensor info to be
+      //      permuted shape.
+      assert(ctx.operand_layouts().at(ind) == ir::Layout::NHWC);
+      tensor_builder->registerTensorInfo(ind, info, ir::Layout::NHWC);
+    }
+  });
+
+  // Start scanning to do notify{First|Last}Use for each tensor
+
+  // If a tensor is a constant, increase the use of the tensor and allocate it first.
+  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+  // deallocated last.
+  for (const auto &ind : constants)
+  {
+    uses_map[ind]++;
+    tensor_builder->notifyFirstUse(ind);
+  }
+
+  for (auto &pair : def_map)
+  {
+    if (pair.second == 0)
+      tensor_builder->notifyFirstUse(pair.first);
+  }
+
+  // This is a workaround to keep the operands over the execution
+  // (the operands look like they are unused)
+  std::vector<ir::OperandIndex> operands_last_until_end;
+  for (auto &pair : uses_map)
+  {
+    if (pair.second == 0)
+      operands_last_until_end.push_back(pair.first);
+  }
+
+  // At each operation,
+  // 1. Scan DEF of outputs. If the DEF, allocate it
+  // 2. Scan DEF of inputs. If variable tensor, allocate it
+  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+  for (const auto op_ind : order)
+  {
+    const auto &op = graph.operations().at(op_ind);
+    auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+    auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+    // Define outputs
+    for (const auto &ind : op_outputs)
+    {
+      if (ctx.external_operands().contains(ind))
+        continue;
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      assert(def_map.find(ind) != def_map.end());
+      if (def_map[ind])
+      {
+        def_map[ind] = 0;
+        tensor_builder->notifyFirstUse(ind);
+      }
+    }
+
+    // Scan variable tensors
+    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+    // non-constant because of less memory usage by memory planning in here
+    for (const auto &ind : op_inputs)
+    {
+      if (ctx.external_operands().contains(ind))
+        continue;
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      const auto &operand = graph.operands().at(ind);
+      if (operand.info().isVariable())
+      {
+        // The variable tensor with buffer is not supported yet
+        assert(operand.data() == nullptr);
+        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+        assert(uses_map[ind] == 1 && def_map[ind] == 0);
+        tensor_builder->notifyFirstUse(ind);
+      }
+    }
+
+    for (const auto &ind : op_inputs)
+    {
+      if (ctx.external_operands().contains(ind))
+        continue;
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      assert(uses_map.find(ind) != uses_map.end());
+      assert(uses_map[ind] > 0);
+      uses_map[ind]--;
+      if (uses_map[ind] == 0)
+      {
+        // plan for deallocation of static tensornode
+        tensor_builder->notifyLastUse(ind);
+      }
+    }
+  }
+
+  for (auto ind : operands_last_until_end)
+  {
+    tensor_builder->notifyLastUse(ind);
+  }
+
+  // Dispose and validate
+  for (const auto &ind : constants)
+  {
+    --uses_map[ind];
+    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  }
+
+  assert(
+    std::all_of(uses_map.begin(), uses_map.end(),
+                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+  assert(
+    std::all_of(def_map.begin(), def_map.end(),
+                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
+{
+  const ir::Graph &graph = *ctx.graph();
+  auto tensor_builder = ctx.tensor_builder;
+
+  auto model_io =
+    (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (ctx.external_operands().contains(ind))
+      return;
+    // NOTE Assuming there is no layout changes (Always assume NHWC or UNKNOWN)
+    assert(graph.layout() != ir::Layout::NCHW);
+    ir::OperandInfo backend_info{obj.shape(), obj.typeInfo(), obj.info().memAllocType(),
+                                 obj.isConstant()};
+    tensor_builder->registerTensorInfo(ind, backend_info, ir::Layout::NHWC);
+  });
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    basic::planTensors(ctx);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    });
+  }
+
+  tensor_builder->allocate();
+
+  return ctx.tensor_registry.get();
+}
+
+inline void initConsts(BackendContext &ctx)
+{
+  ctx.graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+    if (ctx.external_operands().contains(ind) || !operand.isConstant())
+      return;
+
+    auto tensor = ctx.tensor_registry->getNativeITensor(ind);
+    assert(tensor != nullptr);
+
+    VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
+
+    auto data = operand.shareData();
+    assert(data && data->base());
+    ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
+
+    if (ext_tensor == nullptr)
+      throw std::runtime_error{"This tensor is not external tensor"};
+
+    ext_tensor->setData(data);
+  });
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
diff --git a/runtime/onert/core/include/backend/basic/DynamicTensorManager.h b/runtime/onert/core/include/backend/basic/DynamicTensorManager.h

new file mode 100644 (file)

index 0000000..92d8ee3
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/DynamicTensorManager.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
+
+#include "MemoryManager.h"
+#include "TensorRegistry.h"
+
+#include <ir/OperandInfo.h>
+#include <ir/Operation.h>
+#include <ir/Index.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// TODO Find optimized algorithm to manage memory.
+
+/**
+ * @brief Class to manage dynamic tensor and its memory
+ */
+class DynamicTensorManager
+{
+public:
+  DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg);
+
+  virtual ~DynamicTensorManager() = default;
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+                   ir::Layout backend_layout);
+
+  std::shared_ptr<DynamicMemoryManager> dynamic_mem_mgr() { return _dynamic_mem_mgr; }
+
+private:
+  const ITensor *getRawITensor(ir::OperandIndex ind);
+
+private:
+  /**
+   * @brief Memory manager for dynamic tensor.
+   * @todo  DynamicMemoryManager is not optimized. Optimized one is needed
+   */
+  std::shared_ptr<DynamicMemoryManager> _dynamic_mem_mgr;
+  const std::shared_ptr<TensorRegistry> _tensors;
+
+  // contains list of dynamic tensor index, which can be deallocated after running operation
+  // note: this map could contain static tensor index too. Careful use is required.
+  std::unordered_map<ir::OperationIndex, std::unordered_set<backend::ITensor *>>
+    _dealloc_tensor_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/basic/IMemoryPlanner.h b/runtime/onert/core/include/backend/basic/IMemoryPlanner.h

new file mode 100644 (file)

index 0000000..5ca2d95
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/IMemoryPlanner.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_IMEMORY_PLANNER_H__
+#define __ONERT_BACKEND_IMEMORY_PLANNER_H__
+
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Structure to have memory offset and size
+ */
+struct Block
+{
+  uint32_t offset;
+  size_t size;
+};
+
+/**
+ * @brief Interface to plan memory
+ */
+struct IMemoryPlanner
+{
+  using MemoryPlans = ir::OperandIndexMap<Block>;
+
+  /**
+   * @brief Claim memory for operand
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  virtual void claim(const ir::OperandIndex &, size_t) = 0;
+  /**
+   * @brief Release memory for operand
+   * @param[in] index The operand index
+   */
+  virtual void release(const ir::OperandIndex &) = 0;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  virtual uint32_t capacity() = 0;
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  virtual MemoryPlans &memory_plans() = 0;
+
+  virtual ~IMemoryPlanner() = default;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_IMEMORY_PLANNER_H__
diff --git a/runtime/onert/core/include/backend/basic/KernelGeneratorBase.h b/runtime/onert/core/include/backend/basic/KernelGeneratorBase.h

new file mode 100644 (file)

index 0000000..6e123e8
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/KernelGeneratorBase.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
+#define __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
+
+#include <assert.h>
+#include <memory>
+#include <functional>
+
+#include "ir/Graph.h"
+#include "ir/OperationVisitor.h"
+#include "exec/FunctionSequence.h"
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class KernelGeneratorBase : public ir::OperationVisitor
+{
+public:
+  virtual ~KernelGeneratorBase() = default;
+  KernelGeneratorBase(const ir::Graph &graph) : _graph{graph} {}
+
+  virtual std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) = 0;
+
+protected:
+  using OperationVisitor::visit;
+
+#define OP(InternalName)                                                                \
+  void visit(const ir::operation::InternalName &) override                              \
+  {                                                                                     \
+    throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
+  }
+#include "ir/Operations.lst"
+#undef OP
+
+protected:
+  std::unique_ptr<exec::IFunction> releaseFunction()
+  {
+    assert(_return_fn);
+    return std::move(_return_fn);
+  }
+
+protected:
+  const ir::Graph &_graph;
+  std::unique_ptr<exec::IFunction> _return_fn;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
diff --git a/runtime/onert/core/include/backend/basic/MemoryManager.h b/runtime/onert/core/include/backend/basic/MemoryManager.h

new file mode 100644 (file)

index 0000000..6261835
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/MemoryManager.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+
+#include "Allocator.h"
+#include "IMemoryPlanner.h"
+
+namespace onert
+{
+namespace backend
+{
+
+class ITensor;
+
+namespace basic
+{
+
+class MemoryManager
+{
+public:
+  MemoryManager();
+  MemoryManager(const std::string);
+  virtual ~MemoryManager() = default;
+
+  void allocate(void);
+  uint8_t *getBuffer(const ir::OperandIndex &ind) const;
+  void deallocate(void) { _mem_alloc->release(); }
+
+  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+  void releasePlan(const ir::OperandIndex &ind);
+
+private:
+  IMemoryPlanner *createMemoryPlanner();
+  IMemoryPlanner *createMemoryPlanner(const std::string);
+
+private:
+  ir::OperandIndexMap<Block> _tensor_mem_map;
+  std::shared_ptr<IMemoryPlanner> _mem_planner;
+  std::shared_ptr<Allocator> _mem_alloc;
+};
+
+class DynamicMemoryManager
+{
+public:
+  DynamicMemoryManager() = default;
+  virtual ~DynamicMemoryManager() = default;
+
+  std::shared_ptr<Allocator> allocate(const ITensor *tensor, uint32_t capacity);
+  void deallocate(const ITensor *tensor);
+  void deallocate(void);
+
+private:
+  std::unordered_map<const ITensor *, std::shared_ptr<Allocator>> _mem_alloc_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h

new file mode 100644 (file)

index 0000000..f35dbdf
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
+
+#include "backend/basic/DynamicTensorManager.h"
+#include "backend/basic/MemoryManager.h"
+#include "backend/basic/TensorRegistry.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+#include "TensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class DynamicTensorManager;
+
+class StaticTensorManager
+{
+public:
+  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                      DynamicTensorManager *dynamic_tensor_manager);
+  virtual ~StaticTensorManager() = default;
+
+  void allocateNonconsts(void);
+  void deallocateNonconsts(void);
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+                   ir::Layout backend_layout, bool as_const);
+
+  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+  void releasePlan(const ir::OperandIndex &ind);
+
+  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+private:
+  std::unique_ptr<MemoryManager> _nonconst_mgr;
+  const std::shared_ptr<TensorRegistry> _tensors;
+  ir::OperandIndexMap<bool> _as_constants;
+  DynamicTensorManager *_dynamic_tensor_manager;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/basic/Tensor.h b/runtime/onert/core/include/backend/basic/Tensor.h

new file mode 100644 (file)

index 0000000..da51038
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/Tensor.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_H__
+#define __ONERT_BACKEND_BASIC_TENSOR_H__
+
+#include "Allocator.h"
+
+#include <backend/IPortableTensor.h>
+#include <ir/OperandInfo.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class DynamicMemoryManager;
+
+class Tensor : public IPortableTensor
+{
+public:
+  Tensor() = delete;
+  virtual ~Tensor();
+
+public:
+  Tensor(const ir::OperandInfo &info, const ir::Layout layout,
+         DynamicMemoryManager *dynamic_mem_mgr)
+    : IPortableTensor(info), _layout(layout), _buffer(nullptr), _num_references(0),
+      _dynamic_mem_mgr(dynamic_mem_mgr), _allocator(nullptr)
+  {
+    // DO NOTHING
+  }
+
+public:
+  // Only one of two method 'setBuffer' must be called once
+
+  /**
+   * @brief Set the Buffer object. This method is called for static and non-const tensor
+   */
+  void setBuffer(uint8_t *buffer) { _buffer = buffer; }
+
+  /**
+   * @brief Set the Buffer object. This method is called for dynamic or const tensor
+   */
+  void setBuffer(const std::shared_ptr<Allocator> &alloc)
+  {
+    _allocator = alloc;
+    _buffer = alloc->base();
+  }
+
+  /**
+   * @brief Reset the buffer and deallocate the allocation if it is managed by itself
+   */
+  void deallocBuffer() override;
+
+public:
+  uint8_t *buffer() const override { return _buffer; }
+  /**
+   * @brief Get dimension by index
+   *
+   * @param index Index to get diemension
+   * @return size_t Dimension at index
+   * @note N : dimension(0)
+   *       H : dimension(1)
+   *       W : dimension(2)
+   *       C : dimension(3)
+   */
+  size_t total_size() const override { return _info.total_size(); }
+  size_t calcOffset(const ir::Coordinates &coords) const override;
+  ir::Layout layout() const override { return _layout; }
+  ir::DataType data_type() const override { return _info.typeInfo().type(); }
+  bool is_constant() const override { return _info.isConstant(); }
+  bool is_dynamic() const override { return _info.isDynamic(); }
+  void set_dynamic() override { _info.setDynamic(); }
+  bool applyShape(const ir::Shape &new_shape) override;
+  const ir::Sparsity *sparsity() const override { return _info.typeInfo().sparsity(); }
+
+  virtual void increase_ref()
+  {
+    assert(is_dynamic() ||
+           // when not dynamic
+           (_buffer != nullptr));
+
+    ++_num_references;
+  }
+
+  virtual void decrease_ref()
+  {
+    assert(_buffer != nullptr || _allocator != nullptr);
+    assert(_num_references > 0);
+    --_num_references;
+    // constant tensor and dynamic tensor has _allocator
+    if (_num_references == 0)
+    {
+      if (_buffer != nullptr)
+        _buffer = nullptr;
+      if (_allocator != nullptr)
+      {
+        _allocator->release();
+        _allocator = nullptr;
+      }
+    }
+  }
+
+  /**
+   * @brief Reset reference count to zero and release data
+   */
+  virtual void reset_ref()
+  {
+    assert(_buffer != nullptr || _allocator != nullptr);
+    assert(_num_references > 0);
+    _num_references = 0;
+
+    // Only constant tensor has allocator pointer
+    if (_buffer != nullptr)
+      _buffer = nullptr;
+    else
+    {
+      _allocator->release();
+      _allocator = nullptr;
+    }
+  }
+
+  virtual int32_t num_references() { return _num_references; }
+
+  void setShape(const ir::Shape &new_shape) override;
+  ir::Shape getShape() const override;
+
+protected:
+  ir::Layout _layout;
+  uint8_t *_buffer;
+  int32_t _num_references;
+  DynamicMemoryManager *_dynamic_mem_mgr;
+
+private:
+  /**
+   * @brief Memory allocator for dynamic tensor and const tensor
+   *        Since maintaing _allocator and also _buffer makes confusion,
+   *        we will mainly use _buffer (not _allocator.base()) for memory pointer in this code.
+   *        _allocator(shared_ptr) is used to guarantee that we have valid _buffer.
+   */
+  std::shared_ptr<Allocator> _allocator;
+};
+
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ *        instead of allocating and copying the data. ExternalTensor's data pointer points to
+ *        an address of memory such as where memory is already allocated, or mmapped area.
+ *        This is meaning that ExternalTensor can take all of types' ir::Data.
+ *        To support this, assume below things no padding, always NHWC layout,
+ *        constant tensor and not dynamic.
+ */
+class ExternalTensor : public Tensor
+{
+public:
+  ExternalTensor() = delete;
+  virtual ~ExternalTensor();
+
+public:
+  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+    : Tensor(info, layout, nullptr)
+  {
+    assert(_layout == ir::Layout::NHWC);
+    assert(_info.isConstant());
+    assert(_info.isDynamic() == false);
+  }
+
+public:
+  /**
+   * @brief     set Data to be shared from external so that this ExternalTensor will not be
+   *            allocated on CPU backend
+   * @param[in] data    data of Operand to be set
+   */
+  void setData(const std::shared_ptr<ir::Data> data)
+  {
+    assert(data != nullptr);
+    _data = data;
+    // Note. Some op such as cker::Conv could take buffer as nullptr.
+    // That's why _buffer also would be used
+    _buffer = const_cast<uint8_t *>(_data->base());
+  }
+
+public:
+  uint8_t *buffer() const override { return _buffer; }
+
+  bool is_constant() const override { return true; }
+  bool is_dynamic() const override { return false; }
+  void set_dynamic() override
+  {
+    throw std::runtime_error("This tensor does not support changing dynamic");
+  }
+
+  void setShape(const ir::Shape &) override
+  {
+    throw std::runtime_error("This tensor does not support changing shape");
+  }
+
+  void increase_ref() override { ++_num_references; }
+
+  void decrease_ref() override
+  {
+    assert(_data != nullptr);
+    assert(_num_references > 0);
+    --_num_references;
+    if (_num_references == 0)
+    {
+      _data.reset();
+      _buffer = nullptr;
+    }
+  }
+
+  /**
+   * @brief Reset reference count to zero and release data
+   */
+  void reset_ref() override
+  {
+    assert(_data != nullptr);
+    assert(_num_references > 0);
+    _num_references = 0;
+
+    _data.reset();
+    _buffer = nullptr;
+  }
+
+  int32_t num_references() override { return _num_references; }
+
+private:
+  std::shared_ptr<const ir::Data> _data;
+};
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_H__
diff --git a/runtime/onert/core/include/backend/basic/TensorBuilder.h b/runtime/onert/core/include/backend/basic/TensorBuilder.h

new file mode 100644 (file)

index 0000000..a8014e5
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/TensorBuilder.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
+
+#include <backend/basic/DynamicTensorManager.h>
+#include <backend/basic/TensorRegistry.h>
+#include <backend/basic/StaticTensorManager.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
+
+  /**
+   * @brief     Register tensor information to allocate on CPU backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void allocate(void);
+
+  DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+  const std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
diff --git a/runtime/onert/core/include/backend/basic/TensorRegistry.h b/runtime/onert/core/include/backend/basic/TensorRegistry.h

new file mode 100644 (file)

index 0000000..bfff45e
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/TensorRegistry.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
+#define __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
+
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+using TensorRegistry = PortableTensorRegistryTemplate<basic::Tensor>;
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
diff --git a/runtime/onert/core/include/backend/cpu_common/Allocator.h b/runtime/onert/core/include/backend/cpu_common/Allocator.h

deleted file mode 100644 (file)

index fa67fc7..0000000
--- a/runtime/onert/core/include/backend/cpu_common/Allocator.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file        Allocator.h
- * @brief       This file contains Allocator related classes
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
-#define __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
-
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Class to allocate memory
- */
-class Allocator
-{
-public:
-  Allocator(uint32_t capacity);
-  /**
-   * @brief Get memory base pointer
-   * @return base pointer
-   */
-  uint8_t *base() const { return _base.get(); }
-  void release() { _base.reset(); }
-
-private:
-  std::unique_ptr<uint8_t[]> _base;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h

deleted file mode 100644 (file)

index 19e7b7c..0000000
--- a/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
-#define __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
-
-#include <vector>
-
-#include "ir/Index.h"
-#include "ir/OpSequences.h"
-#include "ir/LowerInfoMap.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-// TODO Remove the template param BackendContext once unification of cpu backend context is done
-template <typename T_BackendContext>
-void planTensors(const T_BackendContext &ctx, const std::vector<onert::ir::OpSequenceIndex> &order,
-                 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
-{
-  auto graph = ctx.graph();
-  auto tensor_builder = ctx.tensor_builder;
-
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  auto model_io =
-      (graph->getInputs() + graph->getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
-
-  // Prepare scanning
-  for (auto ind : ctx.operand_list())
-  {
-    if (model_io.contains(ind))
-      continue;
-    const auto &obj = graph->operands().at(ind);
-    const auto &li = lower_info.operand.at(ind);
-    if (li->def_factors().getOnlyElement().backend() != ctx.backend())
-      continue;
-
-    // Ignore unused tensor
-    if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
-    {
-      VERBOSE_F() << "Operand #" << ind.value() << " will not be used. no more process."
-                  << std::endl;
-      return;
-    }
-
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    if (obj.isConstant())
-      constants.append(ind);
-
-    auto factor = li->def_factors().getOnlyElement();
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any op_seq (No use and def)
-      const auto info = obj.info();
-      const auto backend_layout = factor.layout();
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, backend_layout);
-    }
-  }
-
-  // Start scanning to do notify{First|Last}Use for each tensor
-
-  // If a tensor is a constant, increase the use of the tensor and allocate it first.
-  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
-  // deallocated last.
-  for (const auto &ind : constants)
-  {
-    uses_map[ind]++;
-    tensor_builder->notifyFirstUse(ind);
-  }
-
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan DEF of inputs. If variable tensor, allocate it
-  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_seq_ind : order)
-  {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    for (const auto &op_idx : op_seq.operations())
-    {
-      auto op_inputs = graph->operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
-                       ir::Remove::UNDEFINED;
-      auto op_outputs = graph->operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
-                        ir::Remove::UNDEFINED;
-
-      // Define outputs
-      for (const auto &ind : op_outputs)
-      {
-        if (model_io.contains(ind))
-          continue;
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        assert(def_map.find(ind) != def_map.end());
-        if (def_map[ind])
-        {
-          def_map[ind] = 0;
-          tensor_builder->notifyFirstUse(ind);
-        }
-      }
-
-      // Scan variable tensors
-      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-      // non-constant because of less memory usage by memory planning in here
-      for (const auto &ind : op_inputs)
-      {
-        if (model_io.contains(ind))
-          continue;
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        const auto &operand = graph->operands().at(ind);
-        if (operand.info().isVariable())
-        {
-          // The variable tensor with buffer is not supported yet
-          assert(operand.data() == nullptr);
-          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-          assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
-                 lower_info.operand.at(ind)->use_factors().size() == 1);
-          assert(uses_map[ind] == 1 && def_map[ind] == 0);
-          tensor_builder->notifyFirstUse(ind);
-        }
-      }
-
-      for (const auto &ind : op_inputs)
-      {
-        if (model_io.contains(ind))
-          continue;
-        if (!tensor_builder->isRegistered(ind))
-          continue;
-        assert(uses_map.find(ind) != uses_map.end());
-        assert(uses_map[ind] > 0);
-        uses_map[ind]--;
-        if (uses_map[ind] == 0)
-        {
-          // plan for deallocation of static tensornode
-          tensor_builder->notifyLastUse(ind);
-
-          // plan for deallocation of dynamic tensor
-          auto dyn_tensor_manager = tensor_builder->dynamicTensorManager();
-          auto *tensor = ctx.tensor_registry->getITensor(ind);
-          assert(tensor);
-          dyn_tensor_manager->planDealloc(op_idx, tensor);
-        }
-      }
-    }
-  }
-
-  // Dispose and validate
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-      std::all_of(uses_map.begin(), uses_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-      std::all_of(def_map.begin(), def_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h

deleted file mode 100644 (file)

index 6793555..0000000
--- a/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
-
-#include "TensorRegistry.h"
-
-#include "ConstantInitializerBase.h"
-#include <ir/Operands.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class ConstantInitializer : public ConstantInitializerBase
-{
-public:
-  ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
-  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
-  // TODO: For now the only cpu backend supports constant tensor to use data from external
-  // If the other backend supports (to do this,
-  // ExternalTensor should be abstract such as IExternal, maybe),
-  // this can be an interface of cpu_common::ConstantInitializerBase
-  void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-private:
-  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
-  std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h

deleted file mode 100644 (file)

index d4c65de..0000000
--- a/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
-#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
-
-#include <unordered_map>
-#include <functional>
-
-#include "ir/Coordinates.h"
-#include "ir/Layout.h"
-#include "ir/Operand.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include "backend/ITensorRegistry.h"
-#include "util/logging.h"
-#include "backend/ITensorRegistry.h"
-
-namespace
-{
-template <typename T>
-static void Init(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj, const bool copy,
-                 const onert::ir::Layout frontend_layout = onert::ir::Layout::UNKNOWN)
-{
-  const auto shape = model_obj.shape();
-  assert(model_obj.data());
-  auto base = reinterpret_cast<const T *>(model_obj.data()->base());
-
-  obj.access([&](::onert::backend::ITensor &tensor) {
-    switch (shape.rank())
-    {
-      case 0:
-      {
-        assert(model_obj.data()->size() == sizeof(T));
-        const auto value = *reinterpret_cast<const T *>(base);
-        T *into = reinterpret_cast<T *>(tensor.buffer());
-        *into = value;
-        break;
-      }
-      case 1:
-      {
-        auto vec_size = shape.dim(0);
-        for (int32_t n = 0; n < vec_size; ++n)
-        {
-          const T *from = reinterpret_cast<const T *>(base) + n;
-          const auto value = *from;
-
-          T *into = reinterpret_cast<T *>(tensor.buffer()) + n;
-
-          *into = value;
-        }
-        break;
-      }
-      case 2:
-      {
-        const int32_t copy_len = shape.dim(1);
-
-        for (auto i = 0; i < shape.dim(0); ++i)
-        {
-          ::onert::ir::Coordinates coords{i, 0};
-          memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len,
-                 copy_len * sizeof(T));
-        }
-        break;
-      }
-      case 3:
-      {
-        const int32_t width = shape.dim(1);
-        const int32_t copy_len = shape.dim(2);
-
-        for (auto i = 0; i < shape.dim(0); ++i)
-        {
-          for (auto j = 0; j < shape.dim(1); ++j)
-          {
-            ::onert::ir::Coordinates coords{i, j, 0};
-            memcpy(tensor.buffer() + tensor.calcOffset(coords),
-                   base + i * width * copy_len + j * copy_len, copy_len * sizeof(T));
-          }
-        }
-        break;
-      }
-      case 4:
-      {
-        const int32_t height = shape.dim(1);
-        const int32_t width = shape.dim(2);
-        const int32_t copy_len = shape.dim(3);
-        for (auto i = 0; i < shape.dim(0); ++i)
-        {
-          for (auto j = 0; j < shape.dim(1); ++j)
-          {
-            for (auto k = 0; k < shape.dim(2); ++k)
-            {
-              if (copy)
-              {
-                ::onert::ir::Coordinates coords{i, j, k, 0};
-                memcpy(tensor.buffer() + tensor.calcOffset(coords),
-                       base + i * height * width * copy_len + j * width * copy_len + k * copy_len,
-                       copy_len * sizeof(T));
-              }
-              else
-              {
-                for (auto l = 0; l < shape.dim(3); ++l)
-                {
-                  const auto coords = ::onert::ir::convertCoordinates({i, j, k, l}, frontend_layout,
-                                                                      tensor.layout());
-                  T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
-                  T value = *(base + i * height * width * copy_len + j * width * copy_len +
-                              k * copy_len + l);
-                  *into = value;
-                }
-              }
-            }
-          }
-        }
-        break;
-      }
-      default:
-        throw std::runtime_error{"Not yet supported"};
-    }
-  });
-}
-
-template <typename T>
-void copyInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
-{
-  Init<T>(model_obj, obj, true);
-}
-
-template <typename T>
-void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj,
-                 const onert::ir::Layout frontend_layout)
-{
-  const bool copy = frontend_layout == obj.layout();
-  Init<T>(model_obj, obj, copy, frontend_layout);
-}
-
-} // namespace
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class ConstantInitializerBase : public ir::OperationVisitor
-{
-public:
-  virtual ~ConstantInitializerBase() = default;
-
-public:
-  void run()
-  {
-    assert(tensor_registry());
-    for (const auto &it : _init_map)
-    {
-      const auto &ind = it.first;
-      const auto &fn = it.second;
-
-      const auto &model_obj = _operands.at(ind);
-      auto tensor_obj = tensor_registry()->getNativeITensor(ind);
-      assert(tensor_obj != nullptr);
-      fn(model_obj, *tensor_obj);
-      VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
-    }
-    _init_map.clear();
-  }
-
-public:
-  ConstantInitializerBase(const ir::Operands &operands)
-      : _operands{operands}, _current_layout{ir::Layout::UNKNOWN}
-  {
-  }
-
-public:
-  using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
-
-  void setLayout(ir::Layout layout) { _current_layout = layout; }
-
-protected:
-  virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
-
-public:
-  virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
-  {
-    registerPermuteInitializer(index, obj); // as default
-  }
-
-public:
-  void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
-  void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
-
-public:
-  void registerCustomInitializer(const ir::OperandIndex &index, const ir::Operand &obj,
-                                 void (*customInit)(const onert::ir::Operand &model_obj,
-                                                    onert::backend::ITensor &obj))
-  {
-    // For only CONSTANTS
-    // TODO Add to check if tensor has been allocated
-    if (!obj.isConstant())
-      return;
-
-    using namespace std::placeholders;
-    _init_map[index] = std::bind(customInit, _1, _2);
-  }
-
-public:
-  bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
-
-protected:
-  const ir::Operands &_operands;
-  std::unordered_map<ir::OperandIndex, Initializer> _init_map;
-  ir::Layout _current_layout;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h b/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h

deleted file mode 100644 (file)

index c4e06aa..0000000
--- a/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
-
-#include "MemoryManager.h"
-#include "TensorRegistry.h"
-
-#include <backend/IDynamicTensorManager.h>
-#include <ir/OperandInfo.h>
-#include <ir/Operation.h>
-#include <ir/Index.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-// TODO Find optimized algorithm to manage memory.
-
-/**
- * @brief Class to manage dynamic tensor and its memory
- */
-class DynamicTensorManager : public backend::IDynamicTensorManager
-{
-public:
-  DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg);
-
-  virtual ~DynamicTensorManager() = default;
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
-                   ir::Layout backend_layout);
-
-  void planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor) override;
-  void deallocInput(ir::OperationIndex op_ind) override;
-
-  std::shared_ptr<DynamicMemoryManager> dynamic_mem_mgr() { return _dynamic_mem_mgr; }
-
-private:
-  const ITensor *getRawITensor(ir::OperandIndex ind);
-
-private:
-  /**
-   * @brief Memory manager for dynamic tensor.
-   * @todo  DynamicMemoryManager is not optimized. Optimized one is needed
-   */
-  std::shared_ptr<DynamicMemoryManager> _dynamic_mem_mgr;
-  const std::shared_ptr<TensorRegistry> _tensors;
-
-  // contains list of dynamic tensor index, which can be deallocated after running operation
-  // note: this map could contain static tensor index too. Careful use is required.
-  std::unordered_map<ir::OperationIndex, std::unordered_set<backend::ITensor *>>
-      _dealloc_tensor_map;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/IMemoryPlanner.h b/runtime/onert/core/include/backend/cpu_common/IMemoryPlanner.h

deleted file mode 100644 (file)

index 335f8f5..0000000
--- a/runtime/onert/core/include/backend/cpu_common/IMemoryPlanner.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IMEMORY_PLANNER_H__
-#define __ONERT_BACKEND_IMEMORY_PLANNER_H__
-
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Structure to have memory offset and size
- */
-struct Block
-{
-  uint32_t offset;
-  size_t size;
-};
-
-/**
- * @brief Interface to plan memory
- */
-struct IMemoryPlanner
-{
-  using MemoryPlans = ir::OperandIndexMap<Block>;
-
-  /**
-   * @brief Claim memory for operand
-   * @param[in] index The operand index
-   * @param[in] size The size of the memory
-   */
-  virtual void claim(const ir::OperandIndex &, size_t) = 0;
-  /**
-   * @brief Release memory for operand
-   * @param[in] index The operand index
-   */
-  virtual void release(const ir::OperandIndex &) = 0;
-  /**
-   * @brief Get capacity for memory planning
-   * @return The value of capacity
-   */
-  virtual uint32_t capacity() = 0;
-  /**
-   * @brief Get MemoryPlans
-   * @return MemoryPlans
-   */
-  virtual MemoryPlans &memory_plans() = 0;
-
-  virtual ~IMemoryPlanner() = default;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IMEMORY_PLANNER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h

deleted file mode 100644 (file)

index 49a5897..0000000
--- a/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
-#define __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
-
-#include <assert.h>
-#include <memory>
-#include <functional>
-
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include <memory>
-#include "exec/FunctionSequence.h"
-#include "backend/ITensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class KernelGeneratorBase : public ir::OperationVisitor
-{
-public:
-  virtual ~KernelGeneratorBase() = default;
-
-  std::unique_ptr<exec::IFunction> releaseFunction()
-  {
-    assert(_return_fn);
-    return std::move(_return_fn);
-  }
-
-  std::unique_ptr<exec::FunctionSequence> generate(const ir::OpSequence &op_seq)
-  {
-    op_seq.accept(*this);
-    return std::move(_return_fn_seq);
-  }
-
-protected:
-  using OperationVisitor::visit;
-
-  void visit(const ir::OpSequence &) override
-  {
-    throw std::runtime_error("KernelGenerator: NYI for operation 'OpSequence'");
-  }
-
-#define OP(InternalName)                                                                \
-  void visit(const ir::operation::InternalName &) override                              \
-  {                                                                                     \
-    throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
-  }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
-  std::unique_ptr<exec::IFunction> _return_fn;
-  std::unique_ptr<exec::FunctionSequence> _return_fn_seq; // TODO Extract this out
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/MemoryManager.h b/runtime/onert/core/include/backend/cpu_common/MemoryManager.h

deleted file mode 100644 (file)

index 28ec6b8..0000000
--- a/runtime/onert/core/include/backend/cpu_common/MemoryManager.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
-#define __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
-
-#include "Allocator.h"
-#include "backend/IMemoryManager.h"
-#include "IMemoryPlanner.h"
-
-namespace onert
-{
-namespace backend
-{
-
-class ITensor;
-
-namespace cpu_common
-{
-
-class MemoryManager : public backend::IMemoryManager
-{
-public:
-  MemoryManager();
-  MemoryManager(const std::string);
-  virtual ~MemoryManager() = default;
-
-  void allocate(void) override;
-  uint8_t *getBuffer(const ir::OperandIndex &ind) const;
-  void deallocate(void) override { _mem_alloc->release(); }
-
-  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
-  void releasePlan(const ir::OperandIndex &ind);
-
-private:
-  IMemoryPlanner *createMemoryPlanner();
-  IMemoryPlanner *createMemoryPlanner(const std::string);
-
-private:
-  ir::OperandIndexMap<Block> _tensor_mem_map;
-  std::shared_ptr<IMemoryPlanner> _mem_planner;
-  std::shared_ptr<Allocator> _mem_alloc;
-};
-
-class DynamicMemoryManager
-{
-public:
-  DynamicMemoryManager() = default;
-  virtual ~DynamicMemoryManager() = default;
-
-  std::shared_ptr<Allocator> allocate(const ITensor *tensor, uint32_t capacity);
-  void deallocate(const ITensor *tensor);
-  void deallocate(void);
-
-private:
-  std::unordered_map<const ITensor *, std::shared_ptr<Allocator>> _mem_alloc_map;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h

deleted file mode 100644 (file)

index 850bcf2..0000000
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
-
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
-#include "TensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class DynamicTensorManager;
-
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
-  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                      DynamicTensorManager *dynamic_tensor_manager);
-  virtual ~StaticTensorManager() = default;
-
-  void allocateNonconsts(void);
-  void deallocateNonconsts(void);
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
-                   ir::Layout backend_layout, bool as_const);
-
-  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
-  void releasePlan(const ir::OperandIndex &ind);
-
-  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
-  std::unique_ptr<MemoryManager> _nonconst_mgr;
-  const std::shared_ptr<TensorRegistry> _tensors;
-  ir::OperandIndexMap<bool> _as_constants;
-  DynamicTensorManager *_dynamic_tensor_manager;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/Tensor.h b/runtime/onert/core/include/backend/cpu_common/Tensor.h

deleted file mode 100644 (file)

index 5fbf4e7..0000000
--- a/runtime/onert/core/include/backend/cpu_common/Tensor.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
-#define __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
-
-#include "Allocator.h"
-
-#include <backend/IPortableTensor.h>
-#include <ir/OperandInfo.h>
-#include <ir/Data.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class DynamicMemoryManager;
-
-class Tensor : public IPortableTensor
-{
-public:
-  Tensor() = delete;
-  virtual ~Tensor();
-
-public:
-  Tensor(const ir::OperandInfo &info, const ir::Layout layout,
-         DynamicMemoryManager *dynamic_mem_mgr)
-      : IPortableTensor(info), _layout(layout), _buffer(nullptr), _num_references(0),
-        _dynamic_mem_mgr(dynamic_mem_mgr), _allocator(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  // Only one of two method 'setBuffer' must be called once
-
-  /**
-   * @brief Set the Buffer object. This method is called for static and non-const tensor
-   */
-  void setBuffer(uint8_t *buffer)
-  {
-    assert(_buffer == nullptr);
-    _buffer = buffer;
-  }
-
-  /**
-   * @brief Set the Buffer object. This method is called for dynamic or const tensor
-   */
-  void setBuffer(const std::shared_ptr<Allocator> &alloc)
-  {
-    assert(_buffer == nullptr);
-    _allocator = alloc;
-    _buffer = alloc->base();
-  }
-
-  // This works just as setBuffer but it simply overwrite existing Allocator without nullptr check
-  void overwriteBuffer(const std::shared_ptr<Allocator> &alloc)
-  {
-    _allocator = alloc;
-    _buffer = alloc->base();
-  }
-
-  /**
-   * @brief Mark this tensor does not have memory.
-   *        Real memory deallocation should be done by caller.
-   */
-  void resetBuffer()
-  {
-    _allocator.reset();
-    _buffer = nullptr;
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer; }
-  /**
-   * @brief Get dimension by index
-   *
-   * @param index Index to get diemension
-   * @return size_t Dimension at index
-   * @note N : dimension(0)
-   *       H : dimension(1)
-   *       W : dimension(2)
-   *       C : dimension(3)
-   */
-  size_t dimension(size_t index) const final override { return _info.shape().dim(index); }
-  size_t num_dimensions() const override { return _info.shape().rank(); }
-  size_t total_size() const override { return _info.total_size(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override;
-  ir::Layout layout() const override { return _layout; }
-  ir::DataType data_type() const override { return _info.typeInfo().type(); }
-  float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_offset() const override { return _info.typeInfo().offset(); }
-  bool is_constant() const override { return _info.isConstant(); }
-  bool is_dynamic() const override { return _info.isDynamic(); }
-  void set_dynamic() override { _info.setDynamic(); }
-  bool applyShape(const ir::Shape &new_shape) override;
-  const ir::Sparsity *sparsity() const override { return _info.typeInfo().sparsity(); }
-
-  virtual void increase_ref()
-  {
-    assert(is_dynamic() ||
-           // when not dynamic
-           (_buffer != nullptr));
-
-    ++_num_references;
-  }
-
-  virtual void decrease_ref()
-  {
-    assert(_buffer != nullptr || _allocator != nullptr);
-    assert(_num_references > 0);
-    --_num_references;
-    // constant tensor and dynamic tensor has _allocator
-    if (_num_references == 0)
-    {
-      if (_buffer != nullptr)
-        _buffer = nullptr;
-      if (_allocator != nullptr)
-      {
-        _allocator->release();
-        _allocator = nullptr;
-      }
-    }
-  }
-
-  /**
-   * @brief Reset reference count to zero and release data
-   */
-  virtual void reset_ref()
-  {
-    assert(_buffer != nullptr || _allocator != nullptr);
-    assert(_num_references > 0);
-    _num_references = 0;
-
-    // Only constant tensor has allocator pointer
-    if (_buffer != nullptr)
-      _buffer = nullptr;
-    else
-    {
-      _allocator->release();
-      _allocator = nullptr;
-    }
-  }
-
-  virtual int32_t num_references() { return _num_references; }
-
-  void setShape(const ir::Shape &new_shape) override;
-
-protected:
-  ir::Layout _layout;
-  uint8_t *_buffer;
-  int32_t _num_references;
-  DynamicMemoryManager *_dynamic_mem_mgr;
-
-private:
-  /**
-   * @brief Memory allocator for dynamic tensor and const tensor
-   *        Since maintaing _allocator and also _buffer makes confusion,
-   *        we will mainly use _buffer (not _allocator.base()) for memory pointer in this code.
-   *        _allocator(shared_ptr) is used to guarantee that we have valid _buffer.
-   */
-  std::shared_ptr<Allocator> _allocator;
-};
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- *        instead of allocating and copying the data. ExternalTensor's data pointer points to
- *        an address of memory such as where memory is already allocated, or mmapped area.
- *        This is meaning that ExternalTensor can take all of types' ir::Data.
- *        To support this, assume below things no padding, always NHWC layout,
- *        constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
-  ExternalTensor() = delete;
-  virtual ~ExternalTensor();
-
-public:
-  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
-      : Tensor(info, layout, nullptr)
-  {
-    assert(_layout == ir::Layout::NHWC);
-    assert(_info.isConstant());
-    assert(_info.isDynamic() == false);
-  }
-
-public:
-  /**
-   * @brief     set Data to be shared from external so that this ExternalTensor will not be
-   *            allocated on CPU backend
-   * @param[in] data    data of Operand to be set
-   */
-  void setData(const std::shared_ptr<ir::Data> data)
-  {
-    assert(data != nullptr);
-    _data = data;
-    // Note. Some op such as cker::Conv could take buffer as nullptr.
-    // That's why _buffer also would be used
-    _buffer = const_cast<uint8_t *>(_data->base());
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer; }
-
-  bool is_constant() const override { return true; }
-  bool is_dynamic() const override { return false; }
-  void set_dynamic() override
-  {
-    throw std::runtime_error("This tensor does not support changing dynamic");
-  }
-
-  void setShape(const ir::Shape &) override
-  {
-    throw std::runtime_error("This tensor does not support changing shape");
-  }
-
-  void increase_ref() override { ++_num_references; }
-
-  void decrease_ref() override
-  {
-    assert(_data != nullptr);
-    assert(_num_references > 0);
-    --_num_references;
-    if (_num_references == 0)
-    {
-      _data.reset();
-      _buffer = nullptr;
-    }
-  }
-
-  /**
-   * @brief Reset reference count to zero and release data
-   */
-  void reset_ref() override
-  {
-    assert(_data != nullptr);
-    assert(_num_references > 0);
-    _num_references = 0;
-
-    _data.reset();
-    _buffer = nullptr;
-  }
-
-  int32_t num_references() override { return _num_references; }
-
-private:
-  std::shared_ptr<const ir::Data> _data;
-};
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/TensorRegistry.h b/runtime/onert/core/include/backend/cpu_common/TensorRegistry.h

deleted file mode 100644 (file)

index 5896fb7..0000000
--- a/runtime/onert/core/include/backend/cpu_common/TensorRegistry.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
-#define __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
-
-#include "backend/ITensorRegistry.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-using TensorRegistry = PortableTensorRegistryTemplate<cpu_common::Tensor>;
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h

index 7850e21eb47f61a8b76016ba61506bcd658ccd99..befe40022197db32ba2ef47ae39a5b6d4460a34a 100644 (file)
--- a/runtime/onert/core/include/compiler/BackendManager.h
+++ b/runtime/onert/core/include/compiler/BackendManager.h
@@ -22,7 +22,7 @@
  
  #include "ir/Operands.h"
  #include "backend/Backend.h"
-#include "backend/controlflow/Backend.h"
+#include "backend/builtin/Backend.h"
  
  namespace onert
  {
@@ -41,7 +41,7 @@ public:
  public:
    backend::Backend *get(const std::string &key);
    const backend::Backend *get(const std::string &key) const;
-  const backend::controlflow::Backend *getControlflow() const;
+  const backend::builtin::Backend *getBuiltin() const;
    const std::vector<const backend::Backend *> getAll() const
    {
      std::vector<const backend::Backend *> v;
@@ -65,15 +65,15 @@ private:
  private:
    std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map;
    std::map<std::string, std::unique_ptr<backend::Backend, backend_destroy_t>> _gen_map;
-  backend::controlflow::Backend *_controlflow{nullptr};
+  backend::builtin::Backend *_builtin{nullptr};
    /**
-   * @brief load controlflow backend
+   * @brief load builtin backend
     *
     * @param backend backend to be loaded
     *
     * @return
     */
-  void loadControlflowBackend();
+  void loadBuiltinBackend();
  };
  
  } // namespace compiler
diff --git a/runtime/onert/core/include/compiler/CodeMap.h b/runtime/onert/core/include/compiler/CodeMap.h

index e13d3334cd54ee3de3a5eb6a79cdbe6e1cb98de2..b1d861cf83c20e1566c18f96817f97ed6c0974a0 100644 (file)
--- a/runtime/onert/core/include/compiler/CodeMap.h
+++ b/runtime/onert/core/include/compiler/CodeMap.h
@@ -18,6 +18,10 @@
  #define __ONERT_COMPILER_CODE_MAP_H__
  
  #include <unordered_map>
+#include "ir/Index.h"
+#include "ir/Operation.h"
+#include "exec/FunctionSequence.h"
+#include "OperationLowerInfo.h"
  
  namespace onert
  {
@@ -26,18 +30,20 @@ namespace compiler
  
  struct CodeAndInfo
  {
-  const ir::OpSequence *op_seq;
-  const ir::operation::LowerInfo *lower_info;
+  ir::OperationIndex op_ind;
+  const ir::Operation *op;
+  const OperationLowerInfo *lower_info;
    std::unique_ptr<exec::FunctionSequence> fn_seq;
  
-  CodeAndInfo(const ir::OpSequence *op_seq, const ir::operation::LowerInfo *lower_info,
+  CodeAndInfo(const ir::OperationIndex op_ind, const ir::Operation *op,
+              const OperationLowerInfo *lower_info,
                std::unique_ptr<exec::FunctionSequence> &&fn_seq)
-      : op_seq{op_seq}, lower_info{lower_info}, fn_seq{std::move(fn_seq)}
+    : op_ind{op_ind}, op{op}, lower_info{lower_info}, fn_seq{std::move(fn_seq)}
    {
    }
  };
  
-using CodeMap = std::unordered_map<ir::OpSequenceIndex, CodeAndInfo>;
+using CodeMap = std::unordered_map<ir::OperationIndex, CodeAndInfo>;
  
  } // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h

index 68b862d58e97ccbb189254b8e26aad9f98c81f0a..c2589f6d5fbab2eb95d45afac82d94d90986afba 100644 (file)
--- a/runtime/onert/core/include/compiler/Compiler.h
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -53,7 +53,6 @@ struct CompilerOptions
    // OPTIONS ONLY FOR DEBUGGING/PROFILING
    std::string trace_filepath; //< File path to save trace records
    int graph_dump_level;       //< Graph dump level, values between 0 and 2 are valid
-  int op_seq_max_node;        //< Number of nodes that can be
    std::string executor;       //< Executor name to use
    ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
    bool he_scheduler;      //< HEScheduler if true, ManualScheduler otherwise
@@ -89,13 +88,6 @@ public:
  
    State state(void) const { return _state; }
  
-  /**
-   * @brief   Check if model can compile
-   * @return  @c true if model can compile, otherwise @c false
-   * @note    This method don't check model correctness,\n
-   *          so model verification should be done before calling this method
-   */
-  bool checkCompilable();
    CompilerOptions &options() { return _options; }
  
    /**
diff --git a/runtime/onert/core/include/compiler/ExecutionBuilder.h b/runtime/onert/core/include/compiler/ExecutionBuilder.h

index d54d9d0468183e3a953810c081983fa1c1384646..e36ad6d245ee30355d2345566cb2656cc2d4cdd3 100644 (file)
--- a/runtime/onert/core/include/compiler/ExecutionBuilder.h
+++ b/runtime/onert/core/include/compiler/ExecutionBuilder.h
@@ -19,8 +19,7 @@
  
  #include <memory>
  
-#include "ir/operation/LowerInfo.h"
-#include "ir/OpSequence.h"
+#include "ir/Index.h"
  #include "exec/FunctionSequence.h"
  #include "CodeMap.h"
  
@@ -32,7 +31,7 @@ namespace compiler
  class ExecutionBuilder
  {
  public:
-  void append(const ir::OpSequenceIndex index, CodeAndInfo &&code_and_info)
+  void append(const ir::OperationIndex index, CodeAndInfo &&code_and_info)
    {
      _code_map.emplace(index, std::move(code_and_info));
    }
diff --git a/runtime/onert/core/include/compiler/GraphLowerInfo.h b/runtime/onert/core/include/compiler/GraphLowerInfo.h

new file mode 100644 (file)

index 0000000..b679891
--- /dev/null
+++ b/runtime/onert/core/include/compiler/GraphLowerInfo.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_GRAPH_LOWER_INFO_H__
+#define __ONERT_COMPILER_GRAPH_LOWER_INFO_H__
+
+#include <memory>
+#include <unordered_map>
+
+#include "compiler/OperandLowerInfo.h"
+#include "compiler/OperationLowerInfo.h"
+#include "util/ObjectManager.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct GraphLowerInfo
+{
+  util::ObjectManager<ir::OperationIndex, OperationLowerInfo> operation;
+  util::ObjectManager<ir::OperandIndex, OperandLowerInfo> operand;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_GRAPH_LOWER_INFO_MAP_H__
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h

index f115ab9a814d242a0e717ee6079e6ef082271fc3..925de34857d806234b5859ea83ce879958f0fbd8 100644 (file)
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -14,12 +14,11 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_IR_LOWERED_GRAPH_H__
-#define __ONERT_IR_LOWERED_GRAPH_H__
+#ifndef __ONERT_COMPILER_LOWERED_GRAPH_H__
+#define __ONERT_COMPILER_LOWERED_GRAPH_H__
  
  #include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequences.h"
+#include "compiler/GraphLowerInfo.h"
  #include "compiler/BackendResolver.h"
  #include "compiler/Compiler.h"
  
@@ -40,50 +39,32 @@ public:
  
    ir::Graph &graph() { return _graph; }
    const ir::Graph &graph() const { return _graph; }
-  const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
-  const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const;
-  void setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
-                    std::unique_ptr<ir::operation::LowerInfo> &&lower_info);
-  void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index);
-  const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const;
-  ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index);
-  void setLowerInfo(const ir::OperandIndex &index,
-                    std::unique_ptr<ir::operand::LowerInfo> &&lower_info);
-  void removeLowerInfo(const ir::OperandIndex &index);
-  ir::OpSequences &op_seqs() { return _op_seqs; }
-  const ir::OpSequences &op_seqs() const { return _op_seqs; }
-  void iterateTopolOpSeqs(
-      const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const;
-  void
-  iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn);
-  const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
-  const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+  const compiler::GraphLowerInfo &lower_info() const { return _lower_info_map; }
+  compiler::GraphLowerInfo &lower_info() { return _lower_info_map; }
    std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
  
-private:
-  void
-  makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
-                  const compiler::CompilerOptions &options,
-                  const compiler::BackendResolver &backend_resolver);
+  void setHasDynamicTensor(ir::OperationIndex ind, bool val)
+  {
+    _has_dynamic_tensor_map.emplace(ind, val);
+  }
+  bool getHasDynamicTensor(ir::OperationIndex ind) const
+  {
+    auto itr = _has_dynamic_tensor_map.find(ind);
+    return (itr == _has_dynamic_tensor_map.end()) ? false : itr->second;
+  }
  
-  void manipulateLowerInfo(
-      ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info);
+private:
+  void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
    void dumpLowerInfo();
-  bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
-                 ir::Layout layout, const compiler::BackendResolver &backend_resolver);
-  ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
-                                                  const ir::Operation &node);
  
  private:
    ir::Graph _graph;
-  backend::BackendContexts _backend_contexts;
    std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
-  ir::LowerInfoMap _lower_info_map;
-  // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
-  ir::OpSequences _op_seqs;
+  compiler::GraphLowerInfo _lower_info_map;
+  ir::OperationIndexMap<bool> _has_dynamic_tensor_map;
  };
  
  } // namespace compiler
  } // namespace onert
  
-#endif // __ONERT_IR_LOWERED_GRAPH_H__
+#endif // __ONERT_COMPILER_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/compiler/OperandLowerInfo.h b/runtime/onert/core/include/compiler/OperandLowerInfo.h

new file mode 100644 (file)

index 0000000..340b9ce
--- /dev/null
+++ b/runtime/onert/core/include/compiler/OperandLowerInfo.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
+#define __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
+
+#include <functional>
+#include <stdint.h>
+
+#include "compiler/PermuteFactor.h"
+#include "util/Set.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+using PermuteFactorSet = util::Set<PermuteFactor>;
+
+class OperandLowerInfo
+{
+public:
+  OperandLowerInfo()
+  {
+    // DO NOTHING
+  }
+
+public:
+  const PermuteFactorSet &def_factors(void) const { return _def_factors; }
+  const PermuteFactorSet &use_factors(void) const { return _use_factors; }
+
+public:
+  void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); }
+  void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); }
+  void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); }
+  void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); }
+
+private:
+  PermuteFactorSet _def_factors;
+  PermuteFactorSet _use_factors;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/compiler/OperationLowerInfo.h b/runtime/onert/core/include/compiler/OperationLowerInfo.h

new file mode 100644 (file)

index 0000000..20ca129
--- /dev/null
+++ b/runtime/onert/core/include/compiler/OperationLowerInfo.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
+#define __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
+
+#include <string>
+
+#include <compiler/PermuteFactor.h>
+#include <ir/Layout.h>
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+class OperationLowerInfo
+{
+public:
+  OperationLowerInfo(const backend::Backend *backend, ir::Layout layout);
+  const backend::Backend *backend() const { return _permute_factor.backend(); }
+  ir::Layout layout() const { return _permute_factor.layout(); }
+
+private:
+  PermuteFactor _permute_factor;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/compiler/PermuteFactor.h b/runtime/onert/core/include/compiler/PermuteFactor.h

new file mode 100644 (file)

index 0000000..67ce957
--- /dev/null
+++ b/runtime/onert/core/include/compiler/PermuteFactor.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file     PermuteFactor.h
+ * @brief    This file contains PermuteFactor class
+ * @ingroup  COM_AI_RUNTIME
+ */
+
+#ifndef __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
+#define __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
+
+#include <functional>
+
+#include "ir/Layout.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class that has factors of permutation
+ */
+class PermuteFactor
+{
+public:
+  /**
+   * @brief Construct PermuteFactor object.
+   * @param backend  The backend factor
+   * @param layout   The layout factor
+   */
+  PermuteFactor(const backend::Backend *backend, ir::Layout layout)
+    : _backend{backend}, _layout{layout}
+  {
+    // DO NOTHING
+  }
+  /**
+   * @brief Construct PermuteFactor object by copy semantics.
+   */
+  PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout}
+  {
+    // DO NOTHING
+  }
+  /**
+   * @brief Construct PermuteFactor object by move semantics.
+   */
+  PermuteFactor(PermuteFactor &&) = default;
+
+public:
+  /**
+   * @brief Get backend
+   *
+   * @return Backend factor
+   */
+  const backend::Backend *backend() const { return _backend; }
+  /**
+   * @brief Get layout
+   *
+   * @return Layout factor
+   */
+  ir::Layout layout() const { return _layout; }
+
+public:
+  /**
+   * @brief operator overloading function for `==`
+   *
+   * @return Whether two PermuteFactor are the same
+   */
+  bool operator==(const PermuteFactor &other) const
+  {
+    return _backend == other.backend() && _layout == other.layout();
+  }
+  /**
+   * @brief operator overloading function for `!=`
+   *
+   * @return Whether two PermuteFactor are differenct
+   */
+  bool operator!=(const PermuteFactor &other) const { return !(*this == other); }
+
+private:
+  const backend::Backend *_backend{nullptr};
+  ir::Layout _layout{ir::Layout::UNKNOWN};
+};
+
+} // namespace compiler
+} // namespace onert
+
+namespace std
+{
+
+/**
+ * @brief Structure that provides hash value of PermuteFactor
+ */
+template <> struct hash<onert::compiler::PermuteFactor>
+{
+  size_t operator()(const onert::compiler::PermuteFactor &factor) const noexcept
+  {
+    hash<const onert::backend::Backend *> b_hash{};
+    hash<onert::ir::Layout> l_hash{};
+    return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1);
+  }
+};
+
+} // namespace std
+
+std::ostream &operator<<(std::ostream &os, const onert::compiler::PermuteFactor &obj);
+
+#endif // __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h

index 33a2f62d90e1f66f70b29f05184a1ae66f734393..2e484e649f7426ff2468ac417b911500c57b7975 100644 (file)
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -18,7 +18,6 @@
  #define __ONERT_COMPILER_STATIC_SHAPE_INFERER_H__
  
  #include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
  #include "compiler/LoweredGraph.h"
  #include "ir/Index.h"
  
@@ -40,12 +39,12 @@ class StaticShapeInferer : public ir::OperationVisitor
  {
  public:
    StaticShapeInferer(
-      const ir::SubgraphIndex &subg_idx,
-      const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
-          &lowered_subgs)
-      : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
-        _operations(lowered_subgs.at(subg_idx)->graph().operations()),
-        _return_has_dynamic_tensor(false)
+    const ir::SubgraphIndex &subg_idx,
+    const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+      &lowered_subgs)
+    : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
+      _operations(lowered_subgs.at(subg_idx)->graph().operations()),
+      _return_has_dynamic_tensor(false)
    { /* empty */
    }
    virtual ~StaticShapeInferer() = default;
@@ -55,14 +54,15 @@ public:
     * @brief Infer shape of operands beloning to ops and set the output shape.
     *        If output shape cannot be known without running op, mark it so that it can be allocated
     *        when running kernel.
-   * @param op_seq sequence of operations
-   * @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise.
+   * @param op Operation
+   * @return @c true if op's input or output has any dynamic tensor; @c false otherwise.
     */
-  bool infer(const ir::OpSequence &op_seq);
+  bool infer(const ir::Operation &op);
  
    void dump();
  
  private:
+  void inferSubgraph(ir::SubgraphIndex subg_ind);
    bool checkDynamicInput(const ir::Operation &op);
    void setDynamicOutput(const ir::Operation &op);
  
@@ -128,7 +128,7 @@ private:
  
  private:
    const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
-      &_lowered_subgs;
+    &_lowered_subgs;
    // _operands and _operations can be changed by controlflow operation
    ir::Operands &_operands;     // operands of current subgraph
    ir::Operations &_operations; // operations of current subgraph
diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h

index 1f3a13b063a09540b172eac3f61c2c3995c4bf89..3d040e2cc7cbf36caeb0af8bbe6d83df15354cf8 100644 (file)
--- a/runtime/onert/core/include/exec/DynamicShapeInferer.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h
@@ -20,8 +20,6 @@
  #include "ir/Operands.h"
  #include "ir/OperationVisitor.h"
  #include "ir/Index.h"
-#include "backend/IDynamicTensorManager.h"
-#include "backend/ITensorManager.h"
  #include "backend/ITensorRegistry.h"
  
  #include <map>
@@ -40,7 +38,7 @@ class DynamicShapeInferer : public ir::OperationVisitor
  public:
    DynamicShapeInferer(const ir::Operands &operands,
                        const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
-      : _operands(operands), _tensor_registry(tensor_registry)
+    : _operands(operands), _tensor_registry(tensor_registry)
    {
      UNUSED_RELEASE(_operands);
      UNUSED_RELEASE(_tensor_registry);
@@ -106,6 +104,19 @@ private:
     */
    void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
  
+  // in case of output tensor of an op, it is possible that
+  // the output became dynamic although it had been static before.
+  // Once a tensor becomes dynamic, it will lost memory allocated for static.
+  // Therefore once output is dynamic, it should be treated as dynamic tensor. (memory should be
+  // allocated at runtime) `previously` means `dynamic` or `static` has been set in previous loop in
+  // WHILE of previous call of `nnfw_run()`
+  bool previously_static(backend::ITensor *op_output) { return !op_output->is_dynamic(); }
+
+  // helper function that check if op's input is static
+  // Note that input of n'th op has been set to static or dynamic by (n-1)th op.
+  // That's why it is called `currently_static`
+  bool currently_static(backend::ITensor *op_input) { return !op_input->is_dynamic(); }
+
  private:
    /**
     * @brief To get operand-level info, e.g., ir::Operand::isConstant()
diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h

index 6ec6e60ad3271afc358a3daa830df73efabb21a5..cf3f2a882c73067aea2dab100b03eac1b2d66d45 100644 (file)
--- a/runtime/onert/core/include/exec/FunctionSequence.h
+++ b/runtime/onert/core/include/exec/FunctionSequence.h
@@ -26,7 +26,6 @@
  #include "exec/DynamicShapeInferer.h"
  #include "ir/Operations.h"
  #include "backend/ITensorRegistry.h"
-#include "backend/IDynamicTensorManager.h"
  
  namespace onert
  {
@@ -76,10 +75,9 @@ public:
  public: // methods related to dynamic tensor
    struct DynamicTensorCtx
    {
-    const ir::OpSequence *op_seq = nullptr;
+    ir::OperationIndex op_ind;
      const ir::Operations *operations = nullptr;
      std::shared_ptr<exec::DynamicShapeInferer> dynamic_shape_inferer = nullptr;
-    backend::IDynamicTensorManager *dynamic_tensor_manager = nullptr;
    };
  
    /**
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h

index 345bec8eb4ca21c88f9a35363a7d26da66f10928..51fc67af45b254a236738d67e0f9006f6211460b 100644 (file)
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -36,12 +36,12 @@ namespace onert
  namespace backend
  {
  class IPortableTensor;
-namespace controlflow
+namespace builtin
  {
  class IOTensor;
  }
-}
-}
+} // namespace backend
+} // namespace onert
  namespace onert
  {
  namespace exec
@@ -97,7 +97,7 @@ struct IExecutor
     *
     * @return Vector of @c IOTensor
     */
-  virtual const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const = 0;
+  virtual const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const = 0;
  };
  
  using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
diff --git a/runtime/onert/core/include/exec/IODescription.h b/runtime/onert/core/include/exec/IODescription.h

index d1810ec3b9d7ff3878e529093ba7839c84592414..43d4015d58021907d82ffa06771d6db106c26b30 100644 (file)
--- a/runtime/onert/core/include/exec/IODescription.h
+++ b/runtime/onert/core/include/exec/IODescription.h
@@ -37,7 +37,7 @@ struct InputDesc
  
    InputDesc(void) = delete;
    InputDesc(const ir::OperandInfo &info, const void *buffer, const size_t size, ir::Layout layout)
-      : info(info), buffer(buffer), size(size), layout(layout)
+    : info(info), buffer(buffer), size(size), layout(layout)
    {
    }
  };
@@ -53,7 +53,7 @@ struct OutputDesc
  
    OutputDesc(void) = delete;
    OutputDesc(const ir::OperandInfo &info, void *buffer, const size_t size, ir::Layout layout)
-      : info(info), buffer(buffer), size(size), layout(layout)
+    : info(info), buffer(buffer), size(size), layout(layout)
    {
    }
  };
diff --git a/runtime/onert/core/include/ir/Data.h b/runtime/onert/core/include/ir/Data.h

index d31191b4f9b1e6842fc023f0a0fa70235e3165ab..bd0d87caebda7c4a94584332b7d56cf3fd61d91c 100644 (file)
--- a/runtime/onert/core/include/ir/Data.h
+++ b/runtime/onert/core/include/ir/Data.h
@@ -75,10 +75,10 @@ class MMapedData final : public ExternalData
  public:
    MMapedData(int fd, const std::ptrdiff_t mmap_offset, const size_t mmap_size,
               const std::ptrdiff_t data_offset, const size_t data_size)
-      : ExternalData(nullptr, data_size),
-        _mmap_base(
-            static_cast<uint8_t *>(mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, mmap_offset))),
-        _mmap_size(mmap_size), _offset(data_offset - mmap_offset)
+    : ExternalData(nullptr, data_size),
+      _mmap_base(
+        static_cast<uint8_t *>(mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, mmap_offset))),
+      _mmap_size(mmap_size), _offset(data_offset - mmap_offset)
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h

index 2103e6e6400395bf7348018e9b88ffd30ce20497..5543d9559c1f103ac3af2cc5ddd11cf34bf3bf3d 100644 (file)
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -22,8 +22,6 @@
  
  #include "ir/Operands.h"
  #include "ir/Operations.h"
-#include "ir/OpSequence.h"
-#include "ir/OpSequences.h"
  #include "ir/Subgraphs.h"
  
  namespace onert
@@ -58,18 +56,45 @@ public:
    // Graph Building
  public:
    OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
+  /**
+   * @brief Add an operand to the graph with the given index and object
+   *
+   * If the given index is available, it succeeds. And @c operand is moved which invalidates the
+   * caller's pointer. If the given index is already taken, it fails. And @c operand will not be
+   * moved so the caller's pointer will be still valid.
+   *
+   * @param[in] index Index to be added
+   * @param[in] operand Operand to be added
+   * @return OperandIndex @c index if successful, Undefined otherwise
+   */
+  OperandIndex addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand);
    OperationIndex addOperation(std::unique_ptr<Operation> &&node);
+  /**
+   * @brief Add an operation to the graph with the given index and object
+   *
+   * If the given index is available, it succeeds. And @c operation is moved which invalidates the
+   * caller's pointer. If the given index is already taken, it fails. And @c operation will not be
+   * moved so the caller's pointer will be still valid.
+   *
+   * @param index Index to be added
+   * @param operation Operation to be added
+   * @return OperandIndex @c index if successful, Undefined otherwise
+   */
+  OperationIndex addOperation(OperationIndex index, std::unique_ptr<Operation> &&operation);
    void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
    void addInput(const OperandIndex &ind, const std::string &name = "");
    void addOutput(const OperandIndex &ind, const std::string &name = "");
-  void finishBuilding(void);
+  void verify(void);
    void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
-  bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
    void setLayout(Layout layout) { _layout = layout; }
    void setSubgraphs(const std::shared_ptr<Subgraphs> &subgs) { _subgraphs = subgs; }
  
  private:
+  bool checkOperandsForOperation(const Operation &operation);
+  void linkOperandToOperation(OperationIndex index, const Operation &operation);
    void initializeUseDef();
+  // TODO Rename to `sweepUnusedOperands`
+  // TODO Make this public
    void sweepGarbageOperands();
  
    // Custom operations support
@@ -104,8 +129,11 @@ public:
    std::shared_ptr<Subgraphs> &subgraphs() { return _subgraphs; }
    Layout layout() const { return _layout; }
  
+  // Topological sort
+public:
+  std::vector<ir::OperationIndex> topolSortOperations() const;
+
  private:
-  Phase _phase{Phase::BUILDING};
    Operations _operations;
    Operands _operands;
    OperandIndexSequence _inputs;
diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h

index 2538301a45893f64f45d17bd473f1c0ba20d8857..e01b090f3a33ad03426fe488a690eb535fb74396 100644 (file)
--- a/runtime/onert/core/include/ir/Index.h
+++ b/runtime/onert/core/include/ir/Index.h
@@ -19,6 +19,8 @@
  
  #include "util/Index.h"
  
+#include <ostream>
+
  namespace onert
  {
  namespace ir
@@ -33,12 +35,38 @@ using OperandIndex = ::onert::util::Index<uint32_t, OperandIndexTag>;
  struct IOIndexTag;
  using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
  
-struct OpSequenceIndexTag;
-using OpSequenceIndex = ::onert::util::Index<uint32_t, OpSequenceIndexTag>;
-
  struct SubgraphIndexTag;
  using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
  
+template <typename IndexType>
+std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
+{
+  if (index.undefined())
+    return o << prefix << std::string("?");
+  else
+    return o << prefix << index.value();
+}
+
+inline std::ostream &operator<<(std::ostream &o, const OperationIndex &i)
+{
+  return _index_print_impl(o, "@", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const OperandIndex &i)
+{
+  return _index_print_impl(o, "%", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const IOIndex &i)
+{
+  return _index_print_impl(o, "IO", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const SubgraphIndex &i)
+{
+  return _index_print_impl(o, "SUBGRAPH", i); // $ubgraph
+}
+
  } // namespace ir
  } // namespace onert
  
diff --git a/runtime/onert/core/include/ir/LowerInfoMap.h b/runtime/onert/core/include/ir/LowerInfoMap.h

deleted file mode 100644 (file)

index fbabaf3..0000000
--- a/runtime/onert/core/include/ir/LowerInfoMap.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_LOWER_INFO_MAP_H__
-#define __ONERT_IR_LOWER_INFO_MAP_H__
-
-#include <memory>
-#include <unordered_map>
-
-#include "ir/operand/LowerInfo.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/Index.h"
-
-namespace onert
-{
-namespace ir
-{
-
-struct LowerInfoMap
-{
-  std::unordered_map<OpSequenceIndex, std::unique_ptr<operation::LowerInfo>> op_seq;
-  OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operand;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_LOWER_INFO_MAP_H__
diff --git a/runtime/onert/core/include/ir/OpSequence.h b/runtime/onert/core/include/ir/OpSequence.h

deleted file mode 100644 (file)

index 754cf3b..0000000
--- a/runtime/onert/core/include/ir/OpSequence.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OP_SEQUENCE_H__
-#define __ONERT_IR_OP_SEQUENCE_H__
-
-#include <vector>
-#include <string>
-#include <memory>
-
-#include "ir/Layout.h"
-#include "ir/Index.h"
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-
-class Operations;
-
-class OpSequence
-{
-public:
-  explicit OpSequence(Layout layout);
-  OpSequence(const OpSequence &) = delete;
-
-public:
-  void accept(OperationVisitor &v) const;
-
-public:
-  const OperandIndexSequence &getInputs() const { return _inputs; }
-  const OperandIndexSequence &getOutputs() const { return _outputs; }
-  void setInputs(const OperandIndexSequence &indexes) { _inputs = indexes; }
-  void setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
-  void replaceInputs(const OperandIndex &from, const OperandIndex &to)
-  {
-    _inputs.replace(from, to);
-  }
-  void replaceOutputs(const OperandIndex &from, const OperandIndex &to)
-  {
-    _outputs.replace(from, to);
-  }
-
-  void appendOperation(const OperationIndex &index) { _operations.emplace_back(index); }
-
-  std::vector<OperationIndex> &operations(void) { return _operations; }
-
-  const std::vector<OperationIndex> &operations(void) const { return _operations; }
-
-  uint32_t size(void) const { return _operations.size(); }
-
-public:
-  void remove(const OperationIndex &index);
-
-  bool exist(const OperationIndex &index) const;
-
-public:
-  Layout getLayout() const { return _layout; }
-
-public:
-  std::vector<OperationIndex>::const_iterator begin() const { return _operations.begin(); }
-  std::vector<OperationIndex>::const_iterator end() const { return _operations.end(); }
-
-public:
-  /**
-   * @brief Set @c true if any operation in this opSequence has dynamic input
-   *        or dynamic output;
-   *        @c false if all operations' inputs and outputs are static tensors
-   */
-  void has_dynamic_tensor(bool has_dynamic_tensor) { _has_dynamic_tensor = has_dynamic_tensor; }
-  bool has_dynamic_tensor() const { return _has_dynamic_tensor; }
-
-private:
-  OperandIndexSequence _inputs;
-  OperandIndexSequence _outputs;
-  std::vector<OperationIndex> _operations;
-
-private:
-  Layout _layout;
-  bool _has_dynamic_tensor;
-};
-
-std::string getStrFromOpSeq(const OpSequence &op_seq, const Operations &operations);
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OP_SEQUENCE_H__
diff --git a/runtime/onert/core/include/ir/OpSequences.h b/runtime/onert/core/include/ir/OpSequences.h

deleted file mode 100644 (file)

index ab258f3..0000000
--- a/runtime/onert/core/include/ir/OpSequences.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OP_SEQUENCES_H__
-#define __ONERT_IR_OP_SEQUENCES_H__
-
-#include "ir/Index.h"
-#include "ir/OpSequence.h"
-#include "util/ObjectManager.h"
-
-namespace onert
-{
-namespace ir
-{
-
-/**
- * @brief Class that manages OpSequence objects
- */
-class OpSequences : public util::ObjectManager<OpSequenceIndex, OpSequence>
-{
-public:
-  /**
-   * @brief Create an instance of OpSequence with given op and push it to objects
-   *
-   * @param[in] op_idx Operation index that is emplaced
-   * @param[in] layout OpSequence's layout
-   * @return OpSequenceIndex
-   */
-  OpSequenceIndex emplace(const OperationIndex &op_index, Layout layout);
-
-  /**
-   * @brief Push an instance of OpSequence to objects
-   *
-   * @param[in] op_seq An instance of OpSequence
-   * @return OpSequenceIndex
-   */
-  OpSequenceIndex emplace(std::unique_ptr<OpSequence> &&op_seq);
-  /**
-   * @brief Check if an operation does exist in any OpSequences
-   *
-   * @param operation_index Operation index to find
-   * @return true If such operation exists in any OpSequences otherwise false
-   */
-  bool containsOperation(const OperationIndex &operation_index) const;
-  /**
-   * @brief Find an operation from all OpSequences
-   *
-   * @param operation_index Operation index to find
-   * @return OpSequenceIndex Index of OpSequence that contains given operation index
-   */
-  OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
-  /**
-   * @brief Remove an operation from OpSequence
-   *
-   * @param operation_index Operation index to be removed
-   */
-  void removeFromOpSequence(const OperationIndex &operation_index);
-
-private:
-  void cacheSequenceIndex(const OpSequenceIndex &seq_index, const OperationIndex &op_index) const;
-  OpSequenceIndex *findSequenceIndex(const OperationIndex &operation_index) const;
-
-  OpSequenceIndex findOperation(const OperationIndex &operation_index) const;
-  mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes;
-};
-
-/**
- * @brief Dump OpSequences
- *
- * @param op_seqs Operation Sequences
- * @param operations Operation context
- */
-void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations);
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OP_SEQUENCES_H__
diff --git a/runtime/onert/core/include/ir/Operand.h b/runtime/onert/core/include/ir/Operand.h

index f149a744bd35b37a519a0a573e34d9dba6cadecc..e4a91579ae5e158b3b8dc6b19666e03308b0d8da 100644 (file)
--- a/runtime/onert/core/include/ir/Operand.h
+++ b/runtime/onert/core/include/ir/Operand.h
@@ -36,7 +36,7 @@ class Operand
  {
  public:
    explicit Operand(const Shape &shape, const TypeInfo &type)
-      : _info{shape, type, MemAllocType::STATIC}
+    : _info{shape, type, MemAllocType::STATIC}
    {
      // DO NOTHING
    }
@@ -55,6 +55,7 @@ public:
    void removeUse(const OperationIndex &idx);
    void setDef(const OperationIndex &idx);
    void unsetDef();
+  void clearDefUse();
  
  public:
    void type(const DataType type) { _info.type(type); };
diff --git a/runtime/onert/core/include/ir/OperandIndexSequence.h b/runtime/onert/core/include/ir/OperandIndexSequence.h

index 2f78cc8329b0a4c4d85955da1d1e7f0d81ab5d2d..846c3f95022f5bdb2393a8d8e374b7d9f9db19ea 100644 (file)
--- a/runtime/onert/core/include/ir/OperandIndexSequence.h
+++ b/runtime/onert/core/include/ir/OperandIndexSequence.h
@@ -77,7 +77,7 @@ public:
  
  public:
    OperandIndexSequence operator+(const OperandIndexSequence &other) const;
-  friend std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &op_seq);
+  friend std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &operand_seq);
  
  public:
    std::vector<OperandIndex>::const_iterator begin(void) const { return _vec.begin(); }
diff --git a/runtime/onert/core/include/ir/OperandInfo.h b/runtime/onert/core/include/ir/OperandInfo.h

index dc89f8726ac24ea8ddb0020a607ec6b91da3a3f9..11aeb4920c8657e4f674057ef8e49c180d0f04a7 100644 (file)
--- a/runtime/onert/core/include/ir/OperandInfo.h
+++ b/runtime/onert/core/include/ir/OperandInfo.h
@@ -67,8 +67,8 @@ public:
     */
    OperandInfo(const Shape &shape, const TypeInfo &typeInfo, MemAllocType alloc_type,
                bool is_const = false, bool is_variable = false)
-      : _shape(shape), _typeInfo(typeInfo), _alloc_type(alloc_type), _const(is_const),
-        _variable(is_variable)
+    : _shape(shape), _typeInfo(typeInfo), _alloc_type(alloc_type), _const(is_const),
+      _variable(is_variable)
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/core/include/ir/OperationVisitor.h b/runtime/onert/core/include/ir/OperationVisitor.h

index a2777074488baaec13802e18f58411b7cfbdab3f..4d08a5c71bf5feba10d8783d53767b3d7201d902 100644 (file)
--- a/runtime/onert/core/include/ir/OperationVisitor.h
+++ b/runtime/onert/core/include/ir/OperationVisitor.h
@@ -18,7 +18,6 @@
  #define __ONERT_IR_OPERATION_VISITOR_H__
  
  #include "ir/Operations.Include.h"
-#include "ir/OpSequence.h"
  
  namespace onert
  {
@@ -33,15 +32,6 @@ struct OperationVisitor
    virtual void visit(const operation::InternalName &) {}
  #include "ir/Operations.lst"
  #undef OP
-
-  // This OpSequence node should be handled specially so that
-  // Op.lst doesn't have OpSequence
-  // TODO Remove by pushing it down to derived classes.
-  virtual void visit(const OpSequence &)
-  {
-    throw std::runtime_error{
-        "OperationVisitor: This does not privide visit function in OpSequence"};
-  }
  };
  
  } // namespace ir
diff --git a/runtime/onert/core/include/ir/Shape.h b/runtime/onert/core/include/ir/Shape.h

index a0b4bb19697a2d055a3e9cb32da5d9a4dab49cbc..ec6dd07afa0dfa1ae6a2746d306ea4c94eac6114 100644 (file)
--- a/runtime/onert/core/include/ir/Shape.h
+++ b/runtime/onert/core/include/ir/Shape.h
@@ -12,7 +12,7 @@
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
-*/
+ */
  
  #ifndef __ONERT_IR_SHAPE_H__
  #define __ONERT_IR_SHAPE_H__
@@ -61,7 +61,7 @@ struct FeatureShape
     * @param[in]  width  The width value
     */
    FeatureShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
-      : N{batch}, C{depth}, H{height}, W{width}
+    : N{batch}, C{depth}, H{height}, W{width}
    {
      // DO NOTHING
    }
@@ -89,6 +89,7 @@ public:
      return rank() == 0 ? 1 : _dimensions.at(i);
    }
  
+  // TODO Fix different behavior with const version
    int32_t &dim(int i) { return _dimensions.at(i); }
  
    /**
@@ -139,10 +140,10 @@ inline bool operator!=(const Shape &lhs, const Shape &rhs) { return lhs.dims() !
  Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout);
  
  /**
-* @brief Find out if tha rank in this shape is "maybe" unspecified.
-*        Note that when rank == 0, shape could represent scalar or unspecified rank
-* \see https://developer.android.com/ndk/reference/struct/a-neural-networks-operand-type
-*/
+ * @brief Find out if tha rank in this shape is "maybe" unspecified.
+ *        Note that when rank == 0, shape could represent scalar or unspecified rank
+ * \see https://developer.android.com/ndk/reference/struct/a-neural-networks-operand-type
+ */
  inline bool rankMaybeUnspecified(const ir::Shape &shape) { return (shape.rank() == 0); }
  
  } // namespace ir
diff --git a/runtime/onert/core/include/ir/Sparsity.h b/runtime/onert/core/include/ir/Sparsity.h

index ad4d8259bf9bb85930df9314f67ea121d8c9d07a..690304ad2116ea81322c8b70aebd66e4581494b2 100644 (file)
--- a/runtime/onert/core/include/ir/Sparsity.h
+++ b/runtime/onert/core/include/ir/Sparsity.h
@@ -12,7 +12,7 @@
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
-*/
+ */
  
  #ifndef __ONERT_IR_SPARSITY_H__
  #define __ONERT_IR_SPARSITY_H__
@@ -35,7 +35,7 @@ public:
    Sparsity() = default;
    Sparsity(std::vector<uint16_t> &&w1_segments, std::vector<uint16_t> &&w1_indices,
             std::vector<int32_t> &&block_size)
-      : _w1_segments(w1_segments), _w1_indices(w1_indices), _block_size(block_size)
+    : _w1_segments(w1_segments), _w1_indices(w1_indices), _block_size(block_size)
    {
    }
  
diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h

index a1ae4d2e4ef212eac7912ea7a967f5315f4e1bdc..0a00da5fd8d76ac859a3f6c956e6b087cee3c951 100644 (file)
--- a/runtime/onert/core/include/ir/TypeInfo.h
+++ b/runtime/onert/core/include/ir/TypeInfo.h
@@ -17,6 +17,7 @@
  #ifndef __ONERT_IR_TYPEINFO_H__
  #define __ONERT_IR_TYPEINFO_H__
  
+#include <cassert>
  #include <cstdint>
  #include <memory>
  #include <vector>
@@ -29,21 +30,51 @@ namespace onert
  namespace ir
  {
  
+struct Quantization
+{
+  std::vector<float> scales;
+  std::vector<int32_t> zero_points;
+};
+
  class TypeInfo
  {
  public:
    TypeInfo() = delete;
  
-  explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0)
-      : _type(type), _scale(scale), _offset(offset), _sparsity(nullptr)
+  explicit TypeInfo(DataType type) : _type{type}, _sparsity{nullptr} {}
+
+  TypeInfo(DataType type, float scale, int32_t zero_point) : _type{type}, _sparsity{nullptr}
    {
+    quantization(scale, zero_point);
    }
  
  public:
    DataType type() const { return _type; }
-  float scale() const { return _scale; }
-  int32_t offset() const { return _offset; }
+  float scale() const
+  {
+    assert(_quant.scales.size() == 1);
+    return _quant.scales[0];
+  }
+  const std::vector<float> &scales() const { return _quant.scales; }
+  int32_t zero_point() const
+  {
+    assert(_quant.zero_points.size() == 1);
+    return _quant.zero_points[0];
+  }
+  const std::vector<int32_t> &zero_points() const { return _quant.zero_points; }
    const ir::Sparsity *sparsity() const { return _sparsity.get(); }
+  void quantization(float scale, int32_t zero_point)
+  {
+    _quant.scales.resize(1);
+    _quant.scales[0] = scale;
+    _quant.zero_points.resize(1);
+    _quant.zero_points[0] = zero_point;
+  }
+  void quantization(std::vector<float> &&scales, std::vector<int32_t> &&zero_points)
+  {
+    _quant.scales = scales;
+    _quant.zero_points = zero_points;
+  }
    void sparsity(std::shared_ptr<ir::Sparsity> sparsity) { _sparsity = sparsity; }
  
  public:
@@ -51,10 +82,7 @@ public:
  
  private:
    DataType _type;
-  // for quantization
-  float _scale;
-  int32_t _offset;
-  // for sparsity
+  ir::Quantization _quant;
    std::shared_ptr<ir::Sparsity> _sparsity;
  };
  
diff --git a/runtime/onert/core/include/ir/operand/LowerInfo.h b/runtime/onert/core/include/ir/operand/LowerInfo.h

deleted file mode 100644 (file)

index b7f032b..0000000
--- a/runtime/onert/core/include/ir/operand/LowerInfo.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERAND_LOWER_INFO_H__
-#define __ONERT_IR_OPERAND_LOWER_INFO_H__
-
-#include <functional>
-#include <stdint.h>
-
-#include "ir/operand/PermuteFactor.h"
-#include "util/Set.h"
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operand
-{
-using PermuteFactorSet = util::Set<PermuteFactor>;
-
-class LowerInfo
-{
-public:
-  LowerInfo()
-  {
-    // DO NOTHING
-  }
-
-public:
-  const PermuteFactorSet &def_factors(void) const { return _def_factors; }
-  const PermuteFactorSet &use_factors(void) const { return _use_factors; }
-
-public:
-  void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); }
-  void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); }
-  void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); }
-  void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); }
-
-private:
-  PermuteFactorSet _def_factors;
-  PermuteFactorSet _use_factors;
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERAND_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/ir/operand/PermuteFactor.h b/runtime/onert/core/include/ir/operand/PermuteFactor.h

deleted file mode 100644 (file)

index d0bfed3..0000000
--- a/runtime/onert/core/include/ir/operand/PermuteFactor.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     PermuteFactor.h
- * @brief    This file contains onert::ir::operand::PermuteFactor class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
-#define __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
-
-#include <functional>
-
-#include "ir/Layout.h"
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operand
-{
-
-/**
- * @brief Class that has factors of permutation
- */
-class PermuteFactor
-{
-public:
-  /**
-   * @brief Construct PermuteFactor object.
-   * @param backend  The backend factor
-   * @param layout   The layout factor
-   */
-  PermuteFactor(const backend::Backend *backend, Layout layout) : _backend{backend}, _layout{layout}
-  {
-    // DO NOTHING
-  }
-  /**
-   * @brief Construct PermuteFactor object by copy semantics.
-   */
-  PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout}
-  {
-    // DO NOTHING
-  }
-  /**
-   * @brief Construct PermuteFactor object by move semantics.
-   */
-  PermuteFactor(PermuteFactor &&) = default;
-
-public:
-  /**
-   * @brief Get backend
-   *
-   * @return Backend factor
-   */
-  const backend::Backend *backend() const { return _backend; }
-  /**
-   * @brief Get layout
-   *
-   * @return Layout factor
-   */
-  Layout layout() const { return _layout; }
-
-public:
-  /**
-   * @brief operator overloading function for `==`
-   *
-   * @return Whether two PermuteFactor are the same
-   */
-  bool operator==(const PermuteFactor &other) const
-  {
-    return _backend == other.backend() && _layout == other.layout();
-  }
-  /**
-   * @brief operator overloading function for `!=`
-   *
-   * @return Whether two PermuteFactor are differenct
-   */
-  bool operator!=(const PermuteFactor &other) const { return !(*this == other); }
-
-private:
-  const backend::Backend *_backend{nullptr};
-  Layout _layout{Layout::UNKNOWN};
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace onert
-
-namespace std
-{
-
-/**
- * @brief Structure that provides hash value of PermuteFactor
- */
-template <> struct hash<onert::ir::operand::PermuteFactor>
-{
-  size_t operator()(const onert::ir::operand::PermuteFactor &factor) const noexcept
-  {
-    hash<const onert::backend::Backend *> b_hash{};
-    hash<onert::ir::Layout> l_hash{};
-    return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1);
-  }
-};
-
-} // namespace std
-
-#endif // __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
diff --git a/runtime/onert/core/include/ir/operation/LowerInfo.h b/runtime/onert/core/include/ir/operation/LowerInfo.h

deleted file mode 100644 (file)

index 7ef53b8..0000000
--- a/runtime/onert/core/include/ir/operation/LowerInfo.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOWER_INFO_H__
-#define __ONERT_IR_OPERATION_LOWER_INFO_H__
-
-#include <string>
-
-#include <ir/operand/PermuteFactor.h>
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LowerInfo
-{
-public:
-  LowerInfo(const backend::Backend *backend, Layout layout);
-  const backend::Backend *backend() const { return _permute_factor.backend(); }
-  Layout layout() const { return _permute_factor.layout(); }
-
-private:
-  operand::PermuteFactor _permute_factor;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst

index 5944f8344d9b8533881ca43ebabd2e86f19ae0d1..d501345c1355b7ffc8bc16d009148e87b727d563 100644 (file)
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -31,7 +31,6 @@ CONFIG(ACL_LAYOUT              , std::string  , "none")
  CONFIG(NCNN_LAYOUT             , std::string  , "NCHW")
  CONFIG(PROFILING_MODE          , bool         , "0")
  CONFIG(USE_SCHEDULER           , bool         , "0")
-CONFIG(OP_SEQ_MAX_NODE         , int          , "0")
  CONFIG(TRACE_FILEPATH          , std::string  , "")
  CONFIG(FP16_ENABLE             , bool         , "0")
  CONFIG(RUY_THREADS             , int          , "-1")
@@ -44,4 +43,3 @@ CONFIG(USE_MMAPED_DATA         , bool         , "0")
      CONFIG(OP_BACKEND_ ## InternalName, std::string, "")
  #include "ir/Operations.lst"
  #undef OP
-
diff --git a/runtime/onert/core/include/util/Exceptions.h b/runtime/onert/core/include/util/Exceptions.h

index fc3fa0f6468a708eb3df57ad42f0207b3901b7f4..e776865936535971bc11324a67866974dc43050b 100644 (file)
--- a/runtime/onert/core/include/util/Exceptions.h
+++ b/runtime/onert/core/include/util/Exceptions.h
@@ -38,7 +38,7 @@ class InsufficientBufferSizeException : public OnertException
  {
  public:
    InsufficientBufferSizeException(const std::string &msg)
-      : OnertException{"InsufficientBufferSize", msg}
+    : OnertException{"InsufficientBufferSize", msg}
    {
    }
  };
diff --git a/runtime/onert/core/include/util/ITimer.h b/runtime/onert/core/include/util/ITimer.h

index d5a4e1eb080de60a3ef61aa9c8cdf65c49064a54..f63a3f220f60655456ad4beb789a227e4693f70f 100644 (file)
--- a/runtime/onert/core/include/util/ITimer.h
+++ b/runtime/onert/core/include/util/ITimer.h
@@ -46,7 +46,7 @@ public:
    {
      const auto end_time = std::chrono::steady_clock::now();
      _timer_res =
-        std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
+      std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
    };
  
  private:
diff --git a/runtime/onert/core/include/util/Index.h b/runtime/onert/core/include/util/Index.h

index e8f59282dbd8199bc0103218dc0650fa23b89d9c..d3f3dcb46406acf7e0caeceb4b98eb44cecec27b 100644 (file)
--- a/runtime/onert/core/include/util/Index.h
+++ b/runtime/onert/core/include/util/Index.h
@@ -138,14 +138,6 @@ public:
     */
    T value() const { return _index; }
  
-  friend std::ostream &operator<<(std::ostream &o, const Index &t)
-  {
-    if (t.undefined())
-      return o << std::string("undefined");
-    else
-      return o << t.value();
-  }
-
  private:
    T _index;
  };
diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h

index d2dd881a8ca276c83aa344a153e16a475e59c4df..a493789fa83f2a37215cfb434a94e313826d3db4 100644 (file)
--- a/runtime/onert/core/include/util/ObjectManager.h
+++ b/runtime/onert/core/include/util/ObjectManager.h
@@ -24,6 +24,8 @@
  
  #include <memory>
  
+#include "util/logging.h"
+
  namespace onert
  {
  namespace util
@@ -36,35 +38,71 @@ namespace util
  template <typename Index, typename Object> class ObjectManager
  {
  public:
-  ObjectManager() : _index_count{0u} {}
+  ObjectManager() : _next_index{0u} {}
  
  public:
    /**
-   * @brief Create an object with args and put it in the container with a new Index for that
+   * @brief Create an object with args and put it in the container with a newly assigned @c Index
     *
     * @param[in] args Arguments for creating Operand object
-   * @return Created index that is associated to the object
+   * @return Created index that is associated to the object if successful, Undefined index otherwise
     */
    template <class... Args> Index emplace(Args &&... args)
    {
      auto index = generateIndex();
+    if (!index.valid())
+      return index;
      _objects.emplace(index, std::make_unique<Object>(std::forward<Args>(args)...));
      return index;
    }
  
    /**
-   * @brief Put object in the container with a new Index for that
+   * @brief Put the object in the container with given index.
+   *
+   * It fails when the given index is already taken or @c index is Undefined.
     *
     * @param[in] object Object to be pushed
-   * @return Created index that is associated to the object
+   * @param[in] index Index associated with the object
+   * @return @c index if successful, an Undefined index otherwise
+   */
+  Index push(std::unique_ptr<Object> &&object, Index index)
+  {
+    auto gen_index = tryIndex(index);
+    if (gen_index.valid())
+      _objects.emplace(gen_index, std::move(object));
+    return gen_index;
+  }
+  /**
+   * @brief Put the object in the container with a newly assigned index.
+   *
+   * It fails when it cannot generate a valid index.
+   *
+   * @param[in] object Object to be pushed
+   * @return The newly assigned index if successful, an Undefined index otherwise
     */
    Index push(std::unique_ptr<Object> &&object)
    {
-    auto index = generateIndex();
-    _objects.emplace(index, std::move(object));
+    auto gen_index = generateIndex();
+    if (gen_index.valid())
+      _objects.emplace(gen_index, std::move(object));
+    return gen_index;
+  }
+  /**
+   * @brief Set the object in the container with given index.
+   *
+   * If the index is Undefined, it will fail.
+   * If the index is already taken, it will overwrite the content.
+   *
+   * @param[in] object Object to be pushed
+   * @param[in] index Index associated with the object
+   * @return @c index if successful, an Undefined index otherwise
+   */
+  Index set(Index index, std::unique_ptr<Object> &&object)
+  {
+    if (index.valid())
+      _objects[index] = std::move(object);
      return index;
    }
-
    /**
     * @brief Remove the object that is associated with the given index
     *
@@ -76,6 +114,8 @@ public:
    /**
     * @brief Get the object that is associated with the given index
     *
+   * If such object does not exist, it will throw @c std::out_of_range
+   *
     * @param[in] index Index of the object to be returned
     * @return Object
     */
@@ -83,10 +123,44 @@ public:
    /**
     * @brief Get the object that is associated with the given index
     *
+   * If such object does not exist, it will throw @c std::out_of_range
+   *
     * @param[in] index Index of the object to be returned
     * @return Object
     */
    Object &at(const Index &index) { return *(_objects.at(index)); }
+  /**
+   * @brief Get the object that is associated with the given index
+   *
+   * If such object does not exist, it will return `nullptr`
+   *
+   * @param[in] index Index of the object to be returned
+   * @return Object
+   */
+  const Object *getRawPtr(const Index &index) const
+  {
+    auto itr = _objects.find(index);
+    if (itr == _objects.end())
+      return nullptr;
+    else
+    {
+      assert(itr->second != nullptr);
+      return itr->second.get();
+    }
+  }
+  /**
+   * @brief Get the object that is associated with the given index
+   *
+   * If such object does not exist, it will return `nullptr`
+   *
+   * @param[in] index Index of the object to be returned
+   * @return Object The found object
+   */
+  Object *getRawPtr(const Index &index)
+  {
+    return const_cast<Object *>(
+      const_cast<const ObjectManager<Index, Object> *>(this)->getRawPtr(index));
+  }
    /**
     * @brief Get the object that is associated with the given index
     *
@@ -98,6 +172,12 @@ public:
      auto it = _objects.find(index);
      return it != _objects.end();
    }
+  /**
+   * @brief Return the number of objects that the manager contains
+   *
+   * @return size_t Number of objects
+   */
+  size_t size() const { return _objects.size(); }
    /**
     * @brief Iterate over the container with given function
     *
@@ -135,11 +215,39 @@ public:
    }
  
  private:
-  Index generateIndex() { return Index{_index_count++}; }
+  // Try assigning the given index
+  Index tryIndex(Index index)
+  {
+    if (!index.valid())
+      return index;
+    if (_objects.find(index) == _objects.end())
+    {
+      // If the given index does not exist, update the next index and return the index
+      if (index.value() >= _next_index)
+        _next_index = index.value() + 1;
+      return index;
+    }
+    else
+    {
+      // If the given index exists already, return a non-valid index
+      return Index{};
+    }
+  }
+
+  // Generate a new index with `_next_index`
+  Index generateIndex()
+  {
+    // No need to check if there is an entry with _next_index since
+    // _next_index is always ("the highest index in the object map" + 1)
+    if (Index{_next_index}.valid())
+      return Index{_next_index++};
+    else
+      return Index{};
+  }
  
  protected:
    std::unordered_map<Index, std::unique_ptr<Object>> _objects;
-  uint32_t _index_count;
+  uint32_t _next_index;
  };
  
  } // namespace util
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h

index b11da90ce3b855341f3dc6735040f70abd06e875..d859378c60b6c71a9a69495d358b93eaefa78fd9 100644 (file)
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -29,7 +29,6 @@
  #include "ir/Index.h"
  #include "ir/Layout.h"
  #include "ir/OperationVisitor.h"
-#include "backend/IDynamicTensorManager.h"
  #include "backend/ITensor.h"
  #include "backend/ITensorRegistry.h"
  
@@ -104,8 +103,8 @@ ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t outp
  ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &input_true_shape,
                             const ir::Shape &input_false_shape);
  
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
-                          const int32_t *sizes_buf);
+template <typename T>
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const T *begins_buf, const T *sizes_buf);
  
  ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
                                     const ir::Shape &padding_shape, const int32_t *block_shape_buf,
diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h

index a82704cf0ae2e00191f12707d1d97bbc34c2e5bd..334257d87a4cf05ad6479967810c75b86dda6b86 100644 (file)
--- a/runtime/onert/core/include/util/TracingCtx.h
+++ b/runtime/onert/core/include/util/TracingCtx.h
@@ -63,6 +63,13 @@ public:
  
    uint32_t getSessionId() const { return _session_id; }
  
+  /**
+   * @brief Return true if more than 1 session exist
+   *
+   * @note  This method is NOT thread-safe. Call this in thread-safe situation.
+   */
+  bool hasMultipleSessions() const { return _next_session_id > 1; }
+
    /**
     * @brief Set subgraph index of a graph
     */
@@ -78,14 +85,14 @@ private:
    {
      std::unique_lock<std::mutex> lock{_session_id_mutex};
  
-    static uint32_t next_session_id = 0;
-    _session_id = next_session_id++;
+    _session_id = _next_session_id++;
    }
  
  private:
    std::unordered_map<const ir::Graph *, ir::SubgraphIndex> _subgraph_indices;
    uint32_t _session_id;
    static std::mutex _session_id_mutex;
+  static uint32_t _next_session_id;
  };
  
  } // namespace util
diff --git a/runtime/onert/core/include/util/logging.h b/runtime/onert/core/include/util/logging.h

index 65c37507744f5af6baae8a9af9f96a87c8c86819..fe255f8ff1c9f1af356f7c42db2fcf3b6e322ac3 100644 (file)
--- a/runtime/onert/core/include/util/logging.h
+++ b/runtime/onert/core/include/util/logging.h
@@ -18,6 +18,7 @@
  #define __ONERT_UTIL_LOGGING_H__
  
  #include <iostream>
+#include <cstring>
  
  #include "util/ConfigSource.h"
  
@@ -52,17 +53,28 @@ private:
  
  static Context &ctx = Context::get();
  
+inline std::string decorated_name(const char *input)
+{
+  const int min_prefix = 16;
+  std::string prefix(input);
+  auto len_prefix = prefix.size();
+  if (len_prefix > min_prefix)
+    return "[" + prefix + "] ";
+  std::string spaces((min_prefix - len_prefix) / 2, ' ');
+  return (len_prefix % 2 ? "[ " : "[") + spaces + prefix + spaces + "] ";
+}
+
  } // namespace logging
  } // namespace util
  } // namespace onert
  
  #define VERBOSE(name)                        \
    if (::onert::util::logging::ctx.enabled()) \
-  std::cout << "[" << #name << "] "
+  std::cout << ::onert::util::logging::decorated_name(#name)
  
  #define VERBOSE_F()                          \
    if (::onert::util::logging::ctx.enabled()) \
-  std::cout << "[" << __func__ << "] "
+  std::cout << ::onert::util::logging::decorated_name(__func__)
  
  #define WHEN_LOG_ENABLED(METHOD)             \
    if (::onert::util::logging::ctx.enabled()) \
diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc

index 404c3b155dbed227d3ebb75b452c59a259e67a77..b9aab7994b0b1cbe221d2f19474317a1d080f571 100644 (file)
--- a/runtime/onert/core/src/backend/BackendContext.cc
+++ b/runtime/onert/core/src/backend/BackendContext.cc
@@ -23,12 +23,5 @@ namespace onert
  namespace backend
  {
  
-void BackendContext::initialize(const std::vector<OperationInfo> &operation_list,
-                                const std::vector<ir::OperandIndex> &operand_list)
-{
-  _operation_list = operation_list;
-  _operand_list = operand_list;
-}
-
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/core/src/backend/ITensor.cc b/runtime/onert/core/src/backend/ITensor.cc

index 7127ed93df14e3e9f118d7b72778ba747c570c7f..1339cb409662ba1b71d719d2678f54b2a662ad40 100644 (file)
--- a/runtime/onert/core/src/backend/ITensor.cc
+++ b/runtime/onert/core/src/backend/ITensor.cc
@@ -21,14 +21,9 @@ namespace onert
  namespace backend
  {
  
-ir::Shape ITensor::getShape() const
-{
-  onert::ir::Shape shape(num_dimensions());
-  for (uint32_t d = 0; d < num_dimensions(); d++)
-    shape.dim(d) = dimension(d);
-
-  return shape;
-}
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ITensor::~ITensor() {}
  
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/Allocator.cc b/runtime/onert/core/src/backend/basic/Allocator.cc

new file mode 100644 (file)

index 0000000..61214df
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/Allocator.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/Allocator.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+Allocator::Allocator(uint32_t capacity)
+{
+  _base = std::make_unique<uint8_t[]>(capacity);
+
+  VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
+  VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc b/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc

new file mode 100644 (file)

index 0000000..c02cc0c
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/BackendContextHelpers.h"
diff --git a/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc b/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc

new file mode 100644 (file)

index 0000000..07bcb09
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/DynamicTensorManager.h"
+
+#include "util/logging.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg)
+  : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg}
+{
+  // DO NOTHING
+}
+
+void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
+                                       const ir::OperandInfo &tensor_info,
+                                       ir::Layout backend_layout)
+{
+  assert(_tensors->getNativeTensor(ind) == nullptr);
+  auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get());
+  _tensors->setNativeTensor(ind, std::move(tensor));
+}
+
+const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind)
+{
+  auto ptr = _tensors->getITensor(ind);
+  assert(ptr);
+  return ptr;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/MemoryManager.cc b/runtime/onert/core/src/backend/basic/MemoryManager.cc

new file mode 100644 (file)

index 0000000..c468ee4
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryManager.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <backend/basic/MemoryManager.h>
+
+#include <cassert>
+
+#include "MemoryPlannerFactory.h"
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
+{
+  // DO NOTHING
+}
+
+MemoryManager::MemoryManager(const std::string planner_id)
+  : _mem_planner{createMemoryPlanner(planner_id)}
+{
+  // DO NOTHING
+}
+
+basic::IMemoryPlanner *MemoryManager::createMemoryPlanner()
+{
+  auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
+  return basic::MemoryPlannerFactory::get().create(planner_id);
+}
+
+basic::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
+{
+  return basic::MemoryPlannerFactory::get().create(planner_id);
+}
+
+void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+  _mem_planner->claim(ind, size);
+}
+
+void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
+
+void MemoryManager::allocate(void)
+{
+  _mem_alloc = std::make_shared<basic::Allocator>(_mem_planner->capacity());
+  assert(_mem_alloc->base());
+}
+
+uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
+{
+  assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
+  const auto &mem_blk = _mem_planner->memory_plans().at(ind);
+  return _mem_alloc->base() + mem_blk.offset;
+}
+
+std::shared_ptr<basic::Allocator> DynamicMemoryManager::allocate(const ITensor *tensor,
+                                                                 uint32_t capacity)
+{
+  auto find = _mem_alloc_map.find(tensor);
+  if (find != _mem_alloc_map.end())
+    throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated.");
+
+  _mem_alloc_map[tensor] = std::make_shared<basic::Allocator>(capacity);
+  return _mem_alloc_map[tensor];
+}
+
+void DynamicMemoryManager::deallocate(const ITensor *tensor)
+{
+  auto find = _mem_alloc_map.find(tensor);
+  if (find == _mem_alloc_map.end())
+    throw std::runtime_error("Cannot find Allocator for the requested index");
+
+  find->second->release();    // explicitly erase memory
+  _mem_alloc_map.erase(find); // remove tensor and alloc
+}
+
+void DynamicMemoryManager::deallocate(void)
+{
+  for (auto &mem_alloc : _mem_alloc_map)
+  {
+    // Release memory buffer of mem_alloc
+    mem_alloc.second->release();
+  }
+
+  _mem_alloc_map.clear();
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlanner.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc

new file mode 100644 (file)

index 0000000..1fda57b
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlanner.h"
+#include "util/logging.h"
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+  Block blk{_capacity, size};
+  _mem_plans[ind] = blk;
+  _capacity += size;
+
+  VERBOSE(BP_PLANNER) << "CLAIM(" << ind << "): " << blk.offset << ", " << blk.size << std::endl;
+}
+
+void BumpPlanner::release(const ir::OperandIndex &ind)
+{
+  VERBOSE(BP_PLANNER) << "RELEASE(" << ind << "): "
+                      << "NOTHING does" << std::endl;
+}
+
+// There are some assumptions for claiming memory(== making a reservation for memory).
+// 1. About _claim_table(std::map).
+//   - The table's data structure is std::map so that it always sorts
+//     value(OperandIndex) by key(base_offset).
+//   - This claim() inserts key/value into _claim_table and the release() removes the key/value from
+//     _claim_table.
+//   - _claim_table shows the memory status at a certain point in time. Therefore,
+//     - If _claim_table has an offset and a certain size at a certain point in time,
+//       it means the place at the offset has been already claimed(== can't claim now. need to find
+//       someplace new).
+//     - If _claim_table doesn't have any element for an offset and a certain size at a certain
+//       point in time, it means the place at the offset can be claimed.
+// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
+//    the previous claim_base_offset.
+void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+  // Find the right position for claiming
+  uint32_t next_offset = 0;
+  for (auto &mem_claim : _claim_table)
+  {
+    auto claimed_base_offset = mem_claim.first;
+    auto claimed_size = _mem_plans[mem_claim.second].size;
+    if (next_offset + size <= claimed_base_offset)
+    {
+      break;
+    }
+    else
+    {
+      next_offset = claimed_base_offset + claimed_size;
+    }
+  }
+
+  // Now next_offset is set to the proper offset
+  _claim_table[next_offset] = ind;
+  _mem_plans[ind] = {next_offset, size};
+
+  VERBOSE(FF_PLANNER) << "claim(" << ind << "): [+" << next_offset << ", " << size << "sz]"
+                      << std::endl;
+
+  if (_capacity < next_offset + size)
+  {
+    _capacity = next_offset + size;
+  }
+}
+
+void FirstFitPlanner::release(const ir::OperandIndex &ind)
+{
+  for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
+  {
+    if (it->second == ind)
+    {
+      uint32_t offset = it->first;
+      uint32_t index = ind.value();
+      uint32_t size = _mem_plans[ind].size;
+
+      _claim_table.erase(it);
+
+      VERBOSE(FF_PLANNER) << "release(" << index << "): [+" << offset << ", " << size << "sz]"
+                          << std::endl;
+      return;
+    }
+  }
+  assert(!"Cannot release for given index. It has been not claimed or released already.");
+}
+
+WICPlanner::WICPlanner()
+  : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
+    _operands()
+{
+  // DO NOTHING
+}
+
+void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+  _operands.emplace(size, ind);
+  _interference_graph[ind].insert(_interference_graph[ind].end(), _live_operands.cbegin(),
+                                  _live_operands.cend());
+  for (const auto &live_operand : _live_operands)
+  {
+    _interference_graph[live_operand].emplace_back(ind);
+  }
+  _live_operands.emplace(ind);
+
+  VERBOSE(WIC_PLANNER) << "claim(" << ind << "): [" << size << "sz]" << std::endl;
+}
+
+void WICPlanner::release(const ir::OperandIndex &ind)
+{
+  _live_operands.erase(ind);
+  VERBOSE(WIC_PLANNER) << "release(" << ind << ")" << std::endl;
+}
+
+/*
+ * Build memory plans using liveness and size of operands
+ * 1. Build inference graph at claim
+ *   - Two operands interfere if they have overlapped live range
+ * 2. Sort operands in descending order of size
+ *   - Use std::multimap to sort operands
+ * 3. Allocate memory block for sorted operands
+ *   - Find free memory block which does not overlap with interfered operands
+ */
+void WICPlanner::buildMemoryPlans()
+{
+  for (const auto &operand : _operands)
+  {
+    uint32_t size = operand.first;
+    const ir::OperandIndex &ind = operand.second;
+    VERBOSE(WIC_PLANNER) << "build_plan(" << ind << "): [" << size << "sz]" << std::endl;
+
+    uint32_t next_offset = 0;
+    if (_interference_graph.count(ind))
+    {
+      // Find interfered memory plans and sort them by offset
+      std::multimap<uint32_t, uint32_t> interfered_plans;
+      for (const auto &interference : _interference_graph[ind])
+      {
+        if (_mem_plans.count(interference))
+          interfered_plans.emplace(_mem_plans[interference].offset, _mem_plans[interference].size);
+      }
+
+      // Find free memory block in first-fit manner
+      for (const auto &interfered_plan : interfered_plans)
+      {
+        auto claimed_base_offset = interfered_plan.first;
+        auto claimed_size = interfered_plan.second;
+        VERBOSE(WIC_PLANNER) << "interfere : [+" << claimed_base_offset << ", " << claimed_size
+                             << "sz]" << std::endl;
+        if (next_offset + size <= claimed_base_offset)
+        {
+          break;
+        }
+        else if (next_offset < claimed_base_offset + claimed_size)
+        {
+          next_offset = claimed_base_offset + claimed_size;
+        }
+      }
+    }
+    else
+    {
+      VERBOSE(WIC_PLANNER) << "No interference" << std::endl;
+    }
+
+    _mem_plans[ind] = {next_offset, size};
+    VERBOSE(WIC_PLANNER) << "alloc(" << ind << "): [+" << next_offset << ", " << size << "sz]"
+                         << std::endl;
+
+    if (_capacity < next_offset + size)
+    {
+      _capacity = next_offset + size;
+    }
+  }
+  _initialized = true;
+  _interference_graph.clear();
+  _operands.clear();
+}
+
+WICPlanner::MemoryPlans &WICPlanner::memory_plans()
+{
+  if (!_initialized)
+    buildMemoryPlans();
+  return _mem_plans;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlanner.h b/runtime/onert/core/src/backend/basic/MemoryPlanner.h

new file mode 100644 (file)

index 0000000..661d0b5
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        MemoryPlanner.h
+ * @brief       This file contains Memory Planning related classes
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
+#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
+
+#include <map>
+#include <vector>
+#include <unordered_set>
+#include <memory>
+
+#include "backend/basic/Allocator.h"
+#include "backend/basic/IMemoryPlanner.h"
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Class to plan memory by bump way
+ */
+class BumpPlanner : public IMemoryPlanner
+{
+public:
+  /**
+   * @brief Claim memory for operand by bump way
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  void claim(const ir::OperandIndex &, size_t) override;
+  /**
+   * @brief Release memory for operand by bump way
+   * @param[in] index The operand index
+   */
+  void release(const ir::OperandIndex &) override;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  uint32_t capacity() override { return _capacity; }
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+  uint32_t _capacity = 0;
+  MemoryPlans _mem_plans;
+};
+
+/**
+ * @brief Class to plan memory by firstfit way
+ */
+class FirstFitPlanner : public IMemoryPlanner
+{
+public:
+  /**
+   * @brief Claim memory for operand by firstfit way
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  void claim(const ir::OperandIndex &, size_t) override;
+  /**
+   * @brief Release memory for operand by firstfit way
+   * @param[in] index The operand index
+   */
+  void release(const ir::OperandIndex &) override;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  uint32_t capacity() override { return _capacity; }
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+  uint32_t _capacity = 0;
+  MemoryPlans _mem_plans;
+  // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
+  std::map<uint32_t, ir::OperandIndex> _claim_table;
+};
+
+/**
+ * @brief Class to plan memory by Weighted Interval Color algorithm
+ */
+class WICPlanner : public IMemoryPlanner
+{
+public:
+  WICPlanner();
+
+  /**
+   * @brief Claim memory for operand by WIC algorithm
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  void claim(const ir::OperandIndex &, size_t) override;
+  /**
+   * @brief Release memory for operand by WIC algorithm
+   * @param[in] index The operand index
+   */
+  void release(const ir::OperandIndex &) override;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  uint32_t capacity() override
+  {
+    if (!_initialized)
+      buildMemoryPlans();
+    return _capacity;
+  }
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  MemoryPlans &memory_plans() override;
+
+private:
+  void buildMemoryPlans();
+
+  bool _initialized;
+  uint32_t _capacity;
+  MemoryPlans _mem_plans;
+  std::unordered_set<ir::OperandIndex> _live_operands;
+  ir::OperandIndexMap<std::vector<ir::OperandIndex>> _interference_graph;
+  // Sort operands by descending order of size
+  std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _operands;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc

new file mode 100644 (file)

index 0000000..a32228c
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "MemoryPlanner.h"
+#include "ir/Index.h"
+
+TEST(Allocator, allocate_test)
+{
+  ::onert::backend::basic::Allocator allocator(1024);
+  ASSERT_NE(allocator.base(), nullptr);
+}
+
+TEST(BumpPlanner, claim_test)
+{
+  ::onert::backend::basic::BumpPlanner planner;
+
+  auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.claim(mem_idx, size);
+    auto mem_blk = planner.memory_plans()[mem_idx];
+    ASSERT_EQ(mem_blk.offset, expected_offset);
+    ASSERT_EQ(mem_blk.size, size);
+  };
+
+  claim(0, 10, 0);
+  claim(1, 20, 10);
+  claim(2, 30, 30);
+}
+
+TEST(FirstFitPlanner, claim_release_test)
+{
+  ::onert::backend::basic::FirstFitPlanner planner;
+
+  auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.claim(mem_idx, size);
+    auto mem_blk = planner.memory_plans()[mem_idx];
+    ASSERT_EQ(mem_blk.offset, expected_offset);
+    ASSERT_EQ(mem_blk.size, size);
+  };
+
+  auto release = [&planner](uint32_t index) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.release(mem_idx);
+  };
+
+  // 0 CLAIM - 10
+  claim(0, 10, 0);
+
+  // 1 CLAIM - 20
+  claim(1, 20, 10);
+
+  // 2 CLAIM - 30
+  claim(2, 30, 30);
+
+  // 0 RELEASE - 10
+  release(0);
+
+  // 3 CLAIM - 20
+  claim(3, 20, 60);
+
+  // 4 CLAIM - 5
+  claim(4, 5, 0);
+
+  // 5 CLAIM - 10
+  claim(5, 10, 80);
+
+  // 6 CLAIM - 5
+  claim(6, 5, 5);
+
+  // 2 RELEASE - 30
+  release(2);
+
+  // 7 CLAIM - 35
+  claim(7, 35, 90);
+
+  // 8 CLAIM - 10
+  claim(8, 10, 30);
+
+  // 4 RELEASE - 5
+  release(4);
+
+  // 9 CLAIM - 10
+  claim(9, 10, 40);
+
+  // 10 CLAIM - 10
+  claim(10, 10, 50);
+
+  // 6 RELEASE
+  release(6);
+
+  // 1 RELEASE
+  release(1);
+
+  // 8 RELEASE
+  release(8);
+
+  // 9 RELEASE
+  release(9);
+
+  // 10 RELEASE
+  release(10);
+
+  // 3 RELEASE
+  release(3);
+
+  // 5 RELEASE
+  release(5);
+
+  // 7 RELEASE
+  release(7);
+}
+
+TEST(WICPlanner, claim_release_test)
+{
+  ::onert::backend::basic::WICPlanner planner;
+
+  auto claim = [&planner](uint32_t index, size_t size) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.claim(mem_idx, size);
+  };
+
+  auto release = [&planner](uint32_t index) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.release(mem_idx);
+  };
+
+  auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) {
+    onert::ir::OperandIndex mem_idx(index);
+    auto mem_blk = planner.memory_plans()[mem_idx];
+    ASSERT_EQ(mem_blk.offset, expected_offset);
+    ASSERT_EQ(mem_blk.size, size);
+  };
+
+  auto capacity = [&planner](uint32_t expected_capacity) {
+    auto actual_capacity = planner.capacity();
+    ASSERT_EQ(actual_capacity, expected_capacity);
+  };
+
+  claim(0, 20);
+  claim(1, 5);
+  release(0);
+  claim(2, 10);
+  release(1);
+  claim(3, 10);
+  release(2);
+  claim(4, 10);
+  release(3);
+  claim(5, 20);
+  release(4);
+  claim(6, 20);
+  release(5);
+  release(7);
+
+  // VERIFY 0 - 0
+  verify(0, 20, 0);
+
+  // VERIFY 1 - 20
+  verify(1, 5, 20);
+
+  // VERIFY 2 - 0
+  verify(2, 10, 0);
+
+  // VERIFY 3 - 10
+  verify(3, 10, 10);
+
+  // VERIFY 4 - 20
+  verify(4, 10, 20);
+
+  // VERIFY 5 - 0
+  verify(5, 20, 0);
+
+  // VERIFY 6 - 20
+  verify(6, 20, 20);
+
+  // CAPACITY - 40
+  capacity(40);
+}
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc

new file mode 100644 (file)

index 0000000..e126353
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlannerFactory.h"
+
+#include "MemoryPlanner.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+MemoryPlannerFactory &MemoryPlannerFactory::get()
+{
+  static MemoryPlannerFactory instance;
+  return instance;
+}
+
+IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
+{
+  if (key == "FirstFit")
+  {
+    return new FirstFitPlanner;
+  }
+  else if (key == "Bump")
+  {
+    return new BumpPlanner;
+  }
+  else if (key == "WIC")
+  {
+    return new WICPlanner;
+  }
+  return new FirstFitPlanner; // Default Planner
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h

new file mode 100644 (file)

index 0000000..fe32f4c
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
+#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
+
+#include "backend/basic/IMemoryPlanner.h"
+
+#include <string>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class MemoryPlannerFactory
+{
+public:
+  static MemoryPlannerFactory &get();
+
+private:
+  MemoryPlannerFactory() = default;
+
+public:
+  IMemoryPlanner *create(const std::string &key);
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc

new file mode 100644 (file)

index 0000000..d891814
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/StaticTensorManager.h"
+
+#include "backend/basic/DynamicTensorManager.h"
+#include "backend/basic/Tensor.h"
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                                         DynamicTensorManager *dynamic_tensor_manager)
+  : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, _dynamic_tensor_manager{
+                                                         dynamic_tensor_manager}
+{
+  // DO NOTHING
+}
+
+void StaticTensorManager::allocateNonconsts(void)
+{
+  _nonconst_mgr->allocate();
+
+  for (auto &pair : _tensors->native_tensors())
+  {
+    const auto &ind = pair.first;
+    auto tensor = pair.second.get();
+    if (!_as_constants[ind] && !tensor->is_dynamic())
+    {
+      auto *buffer = _nonconst_mgr->getBuffer(ind);
+      tensor->setBuffer(buffer);
+
+      VERBOSE(CPU_StaticTensorManager)
+        << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
+    }
+  }
+}
+
+void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
+                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
+                                      bool as_const)
+{
+  assert(!_tensors->getNativeTensor(ind));
+  if (as_const)
+  {
+    auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
+    _tensors->setNativeTensor(ind, std::move(tensor));
+  }
+  else
+  {
+    auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
+                                           _dynamic_tensor_manager->dynamic_mem_mgr().get());
+    _tensors->setNativeTensor(ind, std::move(tensor));
+  }
+  _as_constants[ind] = as_const;
+}
+
+void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+  assert(_tensors->getNativeTensor(ind));
+
+  // This method is called only when a tensor has proper shape
+  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
+
+  if (!_as_constants[ind])
+    _nonconst_mgr->claimPlan(ind, size);
+}
+
+void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
+{
+  assert(_tensors->getNativeTensor(ind));
+
+  // This method is called only when a tensor has proper shape
+  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
+
+  if (!_as_constants[ind])
+    _nonconst_mgr->releasePlan(ind);
+}
+
+void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+  for (const auto &it : _tensors->native_tensors())
+    fn(it.first);
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/Tensor.cc b/runtime/onert/core/src/backend/basic/Tensor.cc

new file mode 100644 (file)

index 0000000..c2bbc5a
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/Tensor.cc
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/Tensor.h"
+
+#include "ir/DataType.h"
+#include "backend/basic/MemoryManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+Tensor::~Tensor() {}
+
+size_t Tensor::calcOffset(const ir::Coordinates &coords) const
+{
+  auto shape = getShape();
+  size_t rank = shape.rank();
+  rank = rank == 0 ? 1 : rank;
+  size_t offset = 0;
+  for (size_t i = 0; i < rank; ++i)
+  {
+    auto dim = shape.rank() == 0 ? 1 : shape.dim(i);
+    offset = offset * dim + coords[i];
+  }
+  offset *= sizeOfDataType(data_type());
+  return offset;
+}
+
+void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
+
+bool Tensor::applyShape(const ir::Shape &new_shape)
+{
+  bool previously_dynamic = is_dynamic();
+
+  auto allocTensorMem = [&]() {
+    auto capacity = total_size();
+    auto alloc = _dynamic_mem_mgr->allocate(this, capacity);
+    setBuffer(alloc);
+  };
+
+  if (!previously_dynamic || buffer() == nullptr)
+  {
+    // Always set shape - when buffer with same size was already allocated, shape could differ
+    setShape(new_shape);
+    set_dynamic();
+    allocTensorMem();
+  }
+  else
+  {
+    auto previous_size = total_size();
+    auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+    if (previous_size != new_size)
+    {
+      _dynamic_mem_mgr->deallocate(this);
+
+      setShape(new_shape);
+      set_dynamic();
+      allocTensorMem();
+    }
+    else
+    { // when buffer with same size was already allocated, shape could differ
+      setShape(new_shape);
+    }
+  }
+  return true;
+}
+
+ir::Shape Tensor::getShape() const { return _info.shape(); }
+
+void Tensor::deallocBuffer()
+{
+  if (_allocator)
+  {
+    _buffer = nullptr;
+    _allocator.reset();
+    if (_dynamic_mem_mgr)
+    {
+      _dynamic_mem_mgr->deallocate(this);
+    }
+  }
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+// ExternalTensor
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ExternalTensor::~ExternalTensor() {}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc

new file mode 100644 (file)

index 0000000..a10cc2b
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/TensorBuilder.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <backend/basic/TensorBuilder.h>
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+  : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout layout)
+{
+  _tensor_info_map.emplace(ind, info);
+
+  // CPU backend supports only one layout as NHWC
+  assert(layout == ir::Layout::NHWC);
+  if (info.isDynamic())
+  {
+    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+  }
+  else
+  {
+    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  const auto tensor_info = _tensor_info_map.at(ind);
+
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    const auto size = tensor_info.total_size();
+    _static_tensor_mgr->claimPlan(ind, size);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    _static_tensor_mgr->releasePlan(ind);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/Backend.h b/runtime/onert/core/src/backend/builtin/Backend.h

new file mode 100644 (file)

index 0000000..3791f3f
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/Backend.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_H__
+#define __ONERT_BACKEND_BUILTIN_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+#include "TensorBuilder.h"
+#include "Tensor.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    // ControlFlow backend may not build tensors for itself because the backend's operation uses
+    // tensors of other baceknd instead
+    // But the backend builds tensors in case of that the controlflow operation may have constant
+    // input or that consecutive controflow operations exist. We have to make them not to be built
+    // later
+    // 1. Constant input
+    //   These tensors cannot be dynamic tensor, so let's do it as follows:
+    //   - always skip copying
+    //   - if it is operation's input in child subgraph: register "use" as constant input of the
+    //   operations in child subgraph
+    //   - if it is child subgraph's output: register "use" as constant input of the operations
+    //   using it
+    // 2. Consecutive controflow operation's intermediate tensor
+    //   These tensors can be dynamic tensor and this is complicated to support without copying. But
+    //   there is no such case until now, let's support it later
+    // TODO Remove TensorBuilder and ConstantInitializer
+    // TODO Support Consecutive controflow operation's intermediate tensor
+    auto tr = std::make_shared<TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->kernel_gen = std::make_shared<KernelGenerator>(
+      *context->graph(), tb->dynamicTensorManager(), tr, context->external_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_BACKEND_H__
diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.cc b/runtime/onert/core/src/backend/builtin/BackendContext.cc

new file mode 100644 (file)

index 0000000..8a6cddc
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/BackendContext.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "KernelGenerator.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap ret;
+
+  for (auto op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  basic::initConsts(*this);
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph *>(graph())->operands().iterate(
+    [&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.h b/runtime/onert/core/src/backend/builtin/BackendContext.h

new file mode 100644 (file)

index 0000000..93e8252
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/BackendContext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen},
+      _external_context(std::make_shared<ExternalContext>())
+  {
+  }
+
+  ITensorRegistry *genTensors() override;
+
+  FunctionMap genKernels() override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  void planTensors(const std::vector<onert::ir::OperationIndex> &order,
+                   const compiler::GraphLowerInfo &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/builtin/Config.cc b/runtime/onert/core/src/backend/builtin/Config.cc

new file mode 100644 (file)

index 0000000..f792c0c
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/Config.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+std::string Config::ID = "builtin";
+
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout)
+{
+  return frontend_layout;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/Config.h b/runtime/onert/core/src/backend/builtin/Config.h

new file mode 100644 (file)

index 0000000..5226eba
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/Config.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_CONFIG_H__
+#define __ONERT_BACKEND_BUILTIN_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class Config : public IConfig
+{
+public:
+  static std::string ID;
+  std::string id() override { return ID; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return false; }
+  bool supportDynamicTensor() override
+  {
+    // TODO Make this backend to support dynamic tensor or not to build non-constant tensor
+    return true;
+  }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_CONFIG_H__
diff --git a/runtime/onert/core/src/backend/builtin/ConstantInitializer.h b/runtime/onert/core/src/backend/builtin/ConstantInitializer.h

new file mode 100644 (file)

index 0000000..6b8eb3e
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
+
+#include <backend/basic/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using ConstantInitializer = basic::ConstantInitializer;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h b/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h

new file mode 100644 (file)

index 0000000..148948a
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
+
+#include "TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/basic/DynamicTensorManager.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using DynamicTensorManager = basic::DynamicTensorManager;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h

new file mode 100644 (file)

index 0000000..e67be98
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+
+#include <ruy/context.h>
+#include <ruy/context_get_ctx.h>
+#include <ruy/ctx.h>
+#include <ruy/tune.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+// TODO Unify this with cpu::ExternalContext
+class ExternalContext
+{
+private:
+  static const int kDefaultNumThreadpoolThreads = 1;
+
+public:
+  ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
+  {
+    setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+    initPerThreadState();
+  }
+
+  void setMaxNumThreads(int max_num_threads)
+  {
+    const int target_num_threads =
+      max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+    _ruy_context->set_max_num_threads(target_num_threads);
+  }
+
+  ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+  void initPerThreadState()
+  {
+    // Initialize per-thread state.
+    const int thread_count = _ruy_context->max_num_threads();
+    auto ctx = ruy::get_ctx(_ruy_context.get());
+    ctx->EnsureThreadSpecificResources(thread_count);
+    for (int i = 0; i < thread_count; i++)
+    {
+      ctx->GetThreadSpecificTuningResolver(i)->SetTuning(ctx->explicit_tuning());
+    }
+  }
+
+private:
+  const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.cc b/runtime/onert/core/src/backend/builtin/IOTensor.cc

new file mode 100644 (file)

index 0000000..f7f4a69
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOTensor.h"
+
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+IOTensor::~IOTensor() {}
+
+IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
+  : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
+{
+  setUserTensor(nullptr, 0);
+}
+
+void IOTensor::setTensor(IPortableTensor *tensor)
+{
+  assert(tensor);
+  assert(tensor != this);
+  // TODO Handle when layout was changed
+  assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
+  _user_tensor.reset();
+  _tensor = tensor;
+}
+
+void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
+{
+  _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
+  _tensor = _user_tensor.get();
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.h b/runtime/onert/core/src/backend/builtin/IOTensor.h

new file mode 100644 (file)

index 0000000..a1b2064
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
+
+#include "backend/IPortableTensor.h"
+#include "UserTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor object that indirects to the tensor it is pointing to.
+ *
+ * A model I/O tensor could be two types.
+ *
+ * 1. @c UserTensor, if it is the primary graph
+ * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
+ *
+ * To support these, this object indirects everything to the actual tensor pointer.
+ * Exceptionally if it is UserTensor, this class creates and manages it.
+ */
+class IOTensor : public IPortableTensor
+{
+public:
+  IOTensor(const ir::OperandInfo &info, ir::Layout layout);
+  ~IOTensor();
+
+public:
+  void setTensor(IPortableTensor *tensor);
+  void setUserTensor(uint8_t *buffer, size_t size);
+  ir::OperandInfo orig_info() const { return _orig_info; }
+  ir::Layout orig_layout() const { return _orig_layout; }
+
+public:
+  uint8_t *buffer() const override { return _tensor->buffer(); }
+  size_t total_size() const override { return _tensor->total_size(); }
+  size_t calcOffset(const ir::Coordinates &coords) const override
+  {
+    return _tensor->calcOffset(coords);
+  }
+  ir::Layout layout() const override { return _tensor->layout(); }
+  ir::DataType data_type() const override { return _tensor->data_type(); }
+  bool is_dynamic() const override
+  {
+    return _is_dynamic || _orig_info.isDynamic() || (_tensor && _tensor->is_dynamic());
+  }
+  void set_dynamic() override { _is_dynamic = true; }
+  ir::Shape getShape() const override { return _tensor->getShape(); }
+  void setShape(const ir::Shape &shape) override
+  {
+    // Workaround for IPortableTensor holds _info as its member
+    _info.shape(shape);
+    _tensor->setShape(shape);
+  }
+  bool is_constant() const override { return _tensor->is_constant(); }
+  bool applyShape(const ir::Shape &shape) override
+  {
+    // Workaround for IPortableTensor holds _info as its member
+    _info.shape(shape);
+    return _tensor->applyShape(shape);
+  }
+
+public:
+  void setShapeOfIPortableTensor(const ir::Shape &shape) { _info.shape(shape); }
+
+private:
+  const ir::OperandInfo _orig_info;
+  const ir::Layout _orig_layout;
+  bool _is_dynamic{false};
+  IPortableTensor *_tensor{nullptr};        //< The actual tensor that is indirected
+  std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc

new file mode 100644 (file)

index 0000000..3d6358d
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include <backend/BackendContext.h>
+#include <util/Utils.h>
+#include "kernel/IfLayer.h"
+#include "kernel/WhileLayer.h"
+#include "kernel/PermuteLayer.h"
+#include "exec/ExecutorBase.h"
+#include "exec/FunctionSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
+                                 const std::shared_ptr<TensorRegistry> &tensor_reg,
+                                 const std::shared_ptr<ExternalContext> &external_context)
+  : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
+    _tensor_reg{tensor_reg}, _tensor_registries{}, _executor_map{nullptr}, _external_context{
+                                                                             external_context}
+{
+  UNUSED_RELEASE(_graph);
+  UNUSED_RELEASE(_tensor_registries);
+  UNUSED_RELEASE(_executor_map);
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  assert(_dyn_tensor_manager);
+  assert(_tensor_reg);
+
+  auto dyn_shape_inferer =
+    std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
+
+  auto ret = std::make_unique<exec::FunctionSequence>();
+
+  // Prepare to handle dynamic tensors later
+  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+  {
+    dyn_ctx->op_ind = ind;
+    dyn_ctx->operations = &_graph.operations();
+    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+
+    ret->dynamic_tensor_ctx(dyn_ctx);
+  }
+
+  auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  assert(_return_fn); // _return_fn must have been generated
+  ret->append(std::move(_return_fn));
+
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::If &node)
+{
+  const auto then_subg_index = node.param().then_subg_index;
+  const auto else_subg_index = node.param().else_subg_index;
+
+  std::vector<backend::IPortableTensor *> input_tensors;
+  for (const auto input_index : node.getInputs())
+  {
+    auto input_tensor = getPortableTensor(input_index);
+    input_tensors.emplace_back(input_tensor);
+  }
+
+  std::vector<backend::IPortableTensor *> output_tensors;
+  for (const auto output_index : node.getOutputs())
+  {
+    auto output_tensor = getPortableTensor(output_index);
+    output_tensors.emplace_back(output_tensor);
+  }
+
+  // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
+  // creating executor recusively
+  const auto cond_tensor = input_tensors.front();
+  input_tensors.erase(input_tensors.begin());
+  auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
+    cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
+    _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Permute &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  // Add PermuteLayer
+  std::vector<ITensor *> output_tensors{getTensor(output_index)};
+  std::vector<ITensor *> input_tensors{getTensor(input_index)};
+
+  auto fn =
+    std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, _external_context);
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::While &node)
+{
+  const auto cond_subg_index = node.param().cond_subg_index;
+  const auto body_subg_index = node.param().body_subg_index;
+
+  // This op does not support input as a constant, because builtin backend does not have
+  // TensorBuilder
+  std::vector<backend::IPortableTensor *> input_tensors;
+  for (const auto input_index : node.getInputs())
+  {
+    auto input_tensor = getPortableTensor(input_index);
+    input_tensors.emplace_back(input_tensor);
+  }
+
+  std::vector<backend::IPortableTensor *> output_tensors;
+  for (const auto output_index : node.getOutputs())
+  {
+    auto output_tensor = getPortableTensor(output_index);
+    output_tensors.emplace_back(output_tensor);
+  }
+
+  // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
+  // creating executor recusively
+  auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
+    input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
+    _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
+{
+  // get Tensor from all tensor registries (for Permute op)
+  auto ret = _tensor_registries.getITensor(index);
+  assert(ret != nullptr);
+  return ret;
+}
+
+backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
+{
+  auto ret = _tensor_reg->getPortableTensor(index);
+  assert(ret != nullptr);
+  return ret;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h

new file mode 100644 (file)

index 0000000..00ad962
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
+
+#include "exec/IExecutor.h"
+#include "ExternalContext.h"
+#include "ir/Graph.h"
+#include "TensorBuilder.h"
+#include "compiler/TensorRegistries.h"
+#include "backend/basic/KernelGeneratorBase.h"
+#include "TensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
+                  const std::shared_ptr<TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
+  {
+    _tensor_registries = tensor_registries;
+  }
+  void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
+  {
+    // FIXME Using shared_ptr's raw pointer!
+    _executor_map = executor_map.get();
+  }
+
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
+  void visit(const ir::operation::If &) override;
+  void visit(const ir::operation::Permute &) override;
+  void visit(const ir::operation::While &) override;
+
+private:
+  backend::ITensor *getTensor(const ir::OperandIndex &index);
+  backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
+
+private:
+  DynamicTensorManager *_dyn_tensor_manager;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  compiler::TensorRegistries _tensor_registries;
+  exec::ExecutorMap *_executor_map;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/Tensor.h b/runtime/onert/core/src/backend/builtin/Tensor.h

new file mode 100644 (file)

index 0000000..d55e641
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/Tensor.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_H__
+
+#include <backend/basic/Tensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.cc b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc

new file mode 100644 (file)

index 0000000..fefae40
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+  : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
+    _static_tensor_mgr{
+      new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout backend_layout)
+{
+  _tensor_info_map.emplace(ind, info);
+
+  VERBOSE_F() << "cpucommon REGISTER!! " << ind << std::endl;
+  if (info.isDynamic())
+  {
+    _dynamic_tensor_mgr->buildTensor(ind, info, backend_layout);
+  }
+  else
+  {
+    _static_tensor_mgr->buildTensor(ind, info, backend_layout, info.isConstant());
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+    return;
+
+  const auto tensor_info = _tensor_info_map.at(ind);
+
+  if (!nativeOwnTensorAt(ind)->is_dynamic())
+  {
+    const auto size = tensor_info.total_size();
+    _static_tensor_mgr->claimPlan(ind, size);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+    return;
+
+  if (!nativeOwnTensorAt(ind)->is_dynamic())
+  {
+    _static_tensor_mgr->releasePlan(ind);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  // User tensors are not registered in _tensor_info_map but objects for them are exist
+  // in the tensor registry.
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_reg->getITensor(ind))
+    return true;
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
+{
+  return _dynamic_tensor_mgr.get();
+}
+
+basic::Tensor *TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
+{
+  return _tensor_reg->getNativeOwnTensor(ind);
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.h b/runtime/onert/core/src/backend/builtin/TensorBuilder.h

new file mode 100644 (file)

index 0000000..1e364c9
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
+
+#include <backend/basic/StaticTensorManager.h>
+#include <backend/basic/TensorRegistry.h>
+#include <backend/basic/Tensor.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include <unordered_map>
+
+#include "DynamicTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
+
+  /**
+   * @brief     Register tensor information to allocate on CPU backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void allocate(void);
+
+  DynamicTensorManager *dynamicTensorManager(void);
+
+  /**
+   * @brief Get tensor with a specific OperandIndex.
+   * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
+   *        If not, program will crash with assert or exception.
+   * @return operand::Tensor *
+   */
+  basic::Tensor *nativeOwnTensorAt(const ir::OperandIndex &ind);
+
+private:
+  const std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<basic::StaticTensorManager> _static_tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
diff --git a/runtime/onert/core/src/backend/builtin/TensorRegistry.h b/runtime/onert/core/src/backend/builtin/TensorRegistry.h

new file mode 100644 (file)

index 0000000..ae68b13
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/TensorRegistry.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
+
+#include "backend/basic/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "IOTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor registry class for builtin backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeIOTensor  - @c IOTensor managed by this backend ( in @c _base_reg )
+ *     - NOTE The tensor it actually points to can be from another backend
+ * - NativeOwnTensor - @c basic::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor   - @c IPortableTensor managed by other backends
+ *
+ * @note @c _base_reg is used in implementation to reuse @c basic::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+  TensorRegistry() : _base_reg{new basic::TensorRegistry} {}
+
+  ITensor *getITensor(const ir::OperandIndex &ind) override
+  {
+    auto base_tensor = _base_reg->getITensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(ind);
+  }
+
+  ITensor *getNativeITensor(const ir::OperandIndex &ind) override
+  {
+    auto base_tensor = _base_reg->getNativeITensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(ind);
+  }
+
+  IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
+  {
+    auto base_tensor = _base_reg->getPortableTensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(ind);
+  }
+
+  IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
+  {
+    auto base_tensor = _base_reg->getNativeTensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(ind);
+  }
+
+  Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
+  {
+    return _base_reg->getNativeTensor(ind);
+  }
+
+  IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
+  {
+    auto tensor = _native_io_tensors.find(ind);
+    if (tensor != _native_io_tensors.end())
+      return tensor->second.get();
+    return nullptr;
+  }
+
+  bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _base_reg->setMigrantTensor(ind, tensor);
+    return true;
+  }
+
+  void setNativeOwnTensor(ir::OperandIndex ind, std::unique_ptr<Tensor> &&tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _base_reg->setNativeTensor(ind, std::move(tensor));
+  }
+
+  void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _native_io_tensors[ind] = std::move(tensor);
+  }
+
+  const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
+  {
+    return _native_io_tensors;
+  }
+  std::shared_ptr<basic::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+  std::shared_ptr<basic::TensorRegistry> _base_reg;
+  ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/builtin/UserTensor.cc b/runtime/onert/core/src/backend/builtin/UserTensor.cc

new file mode 100644 (file)

index 0000000..f0b00b9
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/UserTensor.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UserTensor.h"
+
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
+{
+  size_t rank = getShape().rank();
+  size_t offset = 0;
+  for (size_t i = 0; i < rank; ++i)
+  {
+    offset = offset * getShape().dim(i) + coords[i];
+  }
+  offset *= sizeOfDataType(data_type());
+  return offset;
+}
+
+bool UserTensor::applyShape(const ir::Shape &new_shape)
+{
+  // User tensors cannot be reallocated.
+  auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+  if (total_size() < new_size)
+    throw InsufficientBufferSizeException{"User given buffer size is too small."};
+  setShape(new_shape);
+  return true;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/UserTensor.h b/runtime/onert/core/src/backend/builtin/UserTensor.h

new file mode 100644 (file)

index 0000000..0d0ed73
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/UserTensor.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
+
+#include "ir/OperandInfo.h"
+#include "backend/IPortableTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor object that is for Input and Output tensors from the user.
+ *
+ * This class is a wrapped buffer that is allocated by the user. So it does not have resposibility
+ * on allocation nor deallocation. All the model input/output tensors are wrapped with this class
+ * for execution.
+ *
+ */
+class UserTensor : public IPortableTensor
+{
+public:
+  UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
+    : IPortableTensor{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
+  {
+  }
+
+  UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
+  {
+  }
+
+public:
+  void setBuffer(uint8_t *buffer, size_t size)
+  {
+    _buffer = buffer;
+    _size = size;
+  }
+
+public:
+  uint8_t *buffer() const override { return _buffer; }
+  size_t total_size() const override { return _size; }
+  size_t calcOffset(const ir::Coordinates &coords) const override;
+  ir::Layout layout() const override { return _layout; }
+  ir::DataType data_type() const override { return _info.typeInfo().type(); }
+  bool is_dynamic() const override { return _dynamic; }
+  void set_dynamic() override { _dynamic = true; }
+  ir::Shape getShape() const override { return _info.shape(); }
+  void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
+  bool is_constant() const override { return false; }
+  bool applyShape(const ir::Shape &) override;
+
+private:
+  ir::Layout _layout;
+  uint8_t *_buffer;
+  size_t _size;
+  bool _dynamic;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc

new file mode 100644 (file)

index 0000000..fdd9d9d
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IfLayer.h"
+
+#include <backend/ITensor.h>
+#include "exec/ExecutorBase.h"
+#include "PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
+                 const std::vector<backend::IPortableTensor *> input_tensors,
+                 const std::vector<backend::IPortableTensor *> output_tensors,
+                 const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
+                 exec::ExecutorMap *executor_map,
+                 const std::shared_ptr<ExternalContext> &external_context)
+  : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
+    _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
+    _executor_map{executor_map}, _external_context{external_context}
+{
+  // At this point, executor_map may not have executors of then subg and else subg
+}
+
+void IfLayer::run()
+{
+  // Check condition
+  // // If true
+  // // // Set _input_tensors -> then-subg's inputs
+  // // // Set outputs of then-subg -> _output_tensors
+  // // // Run then-subg
+  // // Else
+  // // // Set _input_tensors -> else-subg's inputs
+  // // // Set outputs of else-subg -> _output_tensors
+  // // // Run else-subg
+
+  auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
+    bool ret = false;
+    tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
+    return ret;
+  };
+
+  exec::IExecutor *subg_exec = nullptr;
+  bool cond_result = getResultCond(_cond_tensor);
+  if (cond_result)
+  {
+    VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
+    subg_exec = _executor_map->at(_then_subg_index).get();
+  }
+  else
+  {
+    VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
+    subg_exec = _executor_map->at(_else_subg_index).get();
+  }
+
+  subg_exec->execute(_input_tensors, _output_tensors);
+  VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
+              << std::endl;
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h

new file mode 100644 (file)

index 0000000..f12ef36
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include <exec/IExecutor.h>
+#include "../ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class IfLayer : public ::onert::exec::IFunction
+{
+public:
+  IfLayer(backend::IPortableTensor *cond_tensor,
+          const std::vector<backend::IPortableTensor *> input_tensors,
+          const std::vector<backend::IPortableTensor *> output_tensors,
+          const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
+          exec::ExecutorMap *executor_map,
+          const std::shared_ptr<ExternalContext> &external_context);
+
+public:
+  void run() override;
+
+private:
+  backend::IPortableTensor *_cond_tensor;
+  const std::vector<backend::IPortableTensor *> _input_tensors;
+  const std::vector<backend::IPortableTensor *> _output_tensors;
+  const ir::SubgraphIndex _then_subg_index;
+  const ir::SubgraphIndex _else_subg_index;
+  exec::ExecutorMap *_executor_map;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc

new file mode 100644 (file)

index 0000000..20cd87a
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+#include "exec/ShapeConverter.h"
+
+#include "ruy/context.h" // from @ruy
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
+                           const std::vector<ITensor *> &dst_tensors,
+                           const std::shared_ptr<ExternalContext> &external_context)
+  : _external_context{external_context}, _tasks_map{}
+{
+  assert(src_tensors.size() == dst_tensors.size());
+  _src_tensors = src_tensors;
+  _dst_tensors = dst_tensors;
+  _src_tensors_offsets.resize(src_tensors.size());
+  _dst_tensors_offsets.resize(dst_tensors.size());
+}
+
+void PermuteLayer::optimize()
+{
+  // Remove copying of tensor as nullptr
+  auto src_it = _src_tensors.begin();
+  auto dst_it = _dst_tensors.begin();
+  auto src_offsets_it = _src_tensors_offsets.begin();
+  auto dst_offsets_it = _dst_tensors_offsets.begin();
+  while (src_it != _src_tensors.end())
+  {
+    if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
+    {
+      src_it = _src_tensors.erase(src_it);
+      dst_it = _dst_tensors.erase(dst_it);
+      src_offsets_it = _src_tensors_offsets.erase(src_offsets_it);
+      dst_offsets_it = _dst_tensors_offsets.erase(dst_offsets_it);
+    }
+    else
+    {
+      auto src = *src_it;
+      auto dst = *dst_it;
+      src_offsets_it->resize(0);
+      dst_offsets_it->resize(0);
+      if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
+        throw std::runtime_error("data type does not match");
+      const auto permute_type = [&]() -> PermuteType {
+        if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NHWC &&
+            dst->layout() == ir::Layout::NCHW)
+        {
+          return PermuteType::NHWC_TO_NCHW;
+        }
+        else if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NCHW &&
+                 dst->layout() == ir::Layout::NHWC)
+        {
+          return PermuteType::NCHW_TO_NHWC;
+        }
+        else
+        {
+          return PermuteType::COPY;
+        }
+      }();
+      auto fn = [&](backend::ITensor &src_tensor) {
+        dst->access([&](backend::ITensor &dst_tensor) {
+          // NOTE The buffer of both tensor can be nullptr in this step
+          const auto data_size = ir::sizeOfDataType(src_tensor.data_type());
+
+          if (permute_type == PermuteType::COPY)
+          {
+            if ((!src_tensor.has_padding() && !dst_tensor.has_padding()))
+            {
+              const auto num_elements = src_tensor.getShape().num_elements();
+              const int thread_count =
+                _external_context->ruy_context()->max_num_threads() < static_cast<int>(num_elements)
+                  ? _external_context->ruy_context()->max_num_threads()
+                  : num_elements;
+
+              std::vector<PermuteWorkerTask> tasks;
+              auto start = 0;
+              for (auto i = 0; i < thread_count; ++i)
+              {
+                int end = start + (num_elements - start) / (thread_count - i);
+                tasks.emplace_back(src_tensor.buffer(), dst_tensor.buffer(), start * data_size,
+                                   start * data_size, (end - start) * data_size);
+                start = end;
+              }
+              assert(tasks.size() >= 1);
+              _tasks_map[src] = std::move(tasks);
+            }
+            else
+            {
+              auto loop_shape = src_tensor.getShape();
+
+              auto copy_axis = loop_shape.rank() - 1;
+              copy_axis = copy_axis < 0 ? 1 : copy_axis;
+              const auto copy_len = loop_shape.dim(copy_axis) * data_size;
+              loop_shape.dim(copy_axis) = 1;
+
+              appendPermuteTasks(src, dst, loop_shape, copy_len);
+            }
+          }
+          else
+          {
+            assert(src_tensor.getShape().rank() == 4 &&
+                   (permute_type == PermuteType::NHWC_TO_NCHW ||
+                    permute_type == PermuteType::NCHW_TO_NHWC));
+            const auto loop_shape = src_tensor.getShape();
+            const auto copy_len = data_size;
+
+            appendPermuteTasks(src, dst, loop_shape, copy_len);
+          }
+        });
+      };
+      src->access(fn);
+      src_it++;
+      dst_it++;
+      src_offsets_it++;
+      dst_offsets_it++;
+    }
+  }
+}
+
+void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
+                                      const ir::Shape &loop_shape, size_t size)
+{
+  size_t distributed_dim = 0;
+  auto src_shape = src_tensor->getShape();
+  if (src_tensor->layout() == dst_tensor->layout())
+  {
+    for (int i = 1; i < src_shape.rank() - 1; ++i)
+    {
+      distributed_dim = src_shape.dim(distributed_dim) < src_shape.dim(i) ? i : distributed_dim;
+    }
+  }
+  const auto distributed_dim_val = src_shape.dim(distributed_dim);
+  const int thread_count =
+    _external_context->ruy_context()->max_num_threads() < static_cast<int>(distributed_dim_val)
+      ? _external_context->ruy_context()->max_num_threads()
+      : distributed_dim_val;
+  // NOTE Do not remove this assertion. It would cause performance degradation by new threads to be
+  // created in the context's thread pool
+  assert(thread_count <= _external_context->ruy_context()->max_num_threads());
+
+  std::vector<PermuteWorkerTask> tasks;
+  int start = 0;
+  auto one_thread_loop_shape = loop_shape;
+  for (auto i = 0; i < thread_count; ++i)
+  {
+    ir::Coordinates start_coords(one_thread_loop_shape.rank());
+    start_coords.set(distributed_dim, start);
+    int end = start + (distributed_dim_val - start) / (thread_count - i);
+    one_thread_loop_shape.dim(distributed_dim) = end - start;
+    tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
+    start = end;
+  }
+  assert(tasks.size() >= 1);
+  _tasks_map[src_tensor] = std::move(tasks);
+}
+
+void PermuteLayer::runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer)
+{
+  assert(src->getShape().num_elements() * ir::sizeOfDataType(src->data_type()) <=
+         src->total_size());
+  std::vector<PermuteWorkerTask> &tasks = _tasks_map.at(src);
+  for (size_t i = 0; i < tasks.size(); ++i)
+  {
+    tasks.at(i).setBuffers(src->buffer(), dst_buffer);
+  }
+  assert(tasks.size() >= 1);
+  _external_context->ruy_context()->mutable_thread_pool()->Execute(tasks.size(), tasks.data());
+}
+
+void PermuteLayer::run()
+{
+  assert(_src_tensors.size() == _dst_tensors.size());
+  // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
+  // reasons:
+  // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
+  // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
+  // subgraphs(with other backends)
+  // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
+  // reasons
+
+  // check if output is not dynamic
+  for (size_t i = 0; i < _src_tensors.size(); ++i)
+  {
+    auto dst_tensor = _dst_tensors.at(i);
+    auto src_tensor = _src_tensors.at(i);
+    if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
+    {
+      // getting output shape
+      auto src_shape = src_tensor->getShape();
+
+      // set output shape and output buffer
+      ir::Shape new_shape =
+        exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
+
+      try
+      {
+        if (!dst_tensor->applyShape(new_shape))
+          throw std::runtime_error{
+            "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+        assert(dst_tensor->buffer() != nullptr);
+      }
+      catch (const std::out_of_range &e)
+      {
+        std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
+                     "dynamic tensor"
+                  << '\n';
+        throw;
+      }
+    }
+    assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
+           dst_tensor->getShape());
+  }
+  assert(_src_tensors.size() == _dst_tensors.size());
+  assert(_src_tensors.size() == _src_tensors_offsets.size());
+  assert(_dst_tensors.size() == _dst_tensors_offsets.size());
+  auto src_it = _src_tensors.begin();
+  auto dst_it = _dst_tensors.begin();
+  auto src_offsets_it = _src_tensors_offsets.begin();
+  auto dst_offsets_it = _dst_tensors_offsets.begin();
+  while (src_it != _src_tensors.end())
+  {
+    auto src = *src_it;
+    auto dst = *dst_it;
+    auto &src_offsets = *src_offsets_it;
+    auto &dst_offsets = *dst_offsets_it;
+
+    if (src->total_size() == 0)
+    {
+      assert(dst->total_size() == 0);
+    }
+    else
+    {
+      if (src != dst)
+      {
+        // Conditions to run permutation with multithreading
+        // 1. The tasks for multithreathing was created
+        // 2. The tasks's size > 1
+        // 3. Both tensors are not dynamic
+        if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
+            src->is_dynamic() || dst->is_dynamic())
+        {
+          permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets);
+        }
+        // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
+        else if (dst->needMemoryMap() && !dst->is_subtensor())
+        {
+          if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
+          {
+            // This is more effective than multi-threading
+            src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
+          }
+          else
+          {
+            // TODO Optimize this block in case of that padding size of dst is big.
+            _buffers_map[dst].reserve(dst->total_size());
+            auto dst_buffer = _buffers_map[dst].data();
+
+            src->access([&](backend::ITensor &) { runPermuteTasks(src, dst_buffer); });
+            dst->enqueueWriteBuffer(dst_buffer, false);
+          }
+        }
+        else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
+                 !dst->has_padding() && src->layout() == dst->layout())
+        {
+          // This is more effective than multi-threading
+          assert(!dst->needMemoryMap());
+          dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
+        }
+        else
+        {
+          auto fn = [&](backend::ITensor &) {
+            dst->access([&](backend::ITensor &) { runPermuteTasks(src, dst->buffer()); });
+          };
+          src->access(fn);
+        }
+      }
+    }
+    src_it++;
+    dst_it++;
+    src_offsets_it++;
+    dst_offsets_it++;
+  }
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h

new file mode 100644 (file)

index 0000000..ac5470e
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
+
+#include "exec/IPermuteFunction.h"
+#include "exec/IExecutor.h"
+#include "../ExternalContext.h"
+#include "ruy/thread_pool.h" // from @ruy
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class PermuteLayer : public onert::exec::IPermuteFunction
+{
+public:
+  PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
+               const std::shared_ptr<ExternalContext> &external_context);
+
+  void optimize() override;
+
+  void run() override;
+
+private:
+  std::shared_ptr<ExternalContext> _external_context;
+
+private:
+  void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
+                          const ir::Shape &loop_shape, size_t size);
+
+  void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);
+
+  struct PermuteWorkerTask : ruy::Task
+  {
+    using Strides = ir::Coordinates;
+
+    PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
+                      const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
+      : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
+        _src_start_offset{src_tensor.calcOffset(start_coords)},
+        _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
+        _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
+        _dst_layout{dst_tensor.layout()}, _is_permutation{true}
+    {
+      // Set strides
+      setStrides(src_tensor, &_src_strides);
+      setStrides(dst_tensor, &_dst_strides);
+
+      _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
+    }
+    // Constructor for a copy
+    PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
+                      uint32_t dst_start_offset, size_t size)
+      : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
+        _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0},
+        _loop_shape{1}, _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
+    {
+      // DO NOTHING
+    }
+    void setBuffers(const uint8_t *src_buffer, uint8_t *dst_buffer)
+    {
+      _src_buffer = src_buffer;
+      _dst_buffer = dst_buffer;
+    }
+    void Run() override
+    {
+      ShapeLoop(_loop_shape, [&](const onert::ir::Coordinates &coords) {
+        size_t src_offset = _src_start_offset;
+        size_t dst_offset = _dst_start_offset;
+        assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
+        ir::Coordinates dst_coords = coords;
+        if (_is_permutation)
+        {
+          dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
+        }
+        for (auto i = 0; i < _loop_shape.rank(); ++i)
+        {
+          assert(coords[i] >= 0 && dst_coords[i] >= 0);
+          src_offset += coords[i] * _src_strides[i];
+          dst_offset += dst_coords[i] * _dst_strides[i];
+        }
+        memcpy(_dst_buffer + dst_offset, _src_buffer + src_offset, _size);
+      });
+    }
+
+  private:
+    void setStrides(const ITensor &tensor, Strides *strides)
+    {
+      auto shape = tensor.getShape();
+      const size_t rank = shape.rank();
+      for (size_t i = 0; i < rank; ++i)
+      {
+        ir::Coordinates no_step(rank), one_step(rank);
+        one_step.set(i, 1);
+        if (shape.dim(i) > 1)
+        {
+          strides->set(i, tensor.calcOffset(one_step) - tensor.calcOffset(no_step));
+        }
+        else
+        {
+          // If dimension value is 0 or 1, the stride of the dimension will be not used
+          // Do not call calcOffset() with coordinate value that is greater than dimension value
+          strides->set(i, 0);
+        }
+        assert((*strides)[i] >= 0);
+      }
+    }
+
+  private:
+    const uint8_t *_src_buffer;
+    uint8_t *_dst_buffer;
+    size_t _src_start_offset;
+    size_t _dst_start_offset;
+    Strides _src_strides;
+    Strides _dst_strides;
+    const ir::Shape _loop_shape;
+    const size_t _size;
+    const ir::Layout _src_layout;
+    const ir::Layout _dst_layout;
+    bool _is_permutation;
+  };
+  std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc

new file mode 100644 (file)

index 0000000..81b4a63
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WhileLayer.h"
+
+#include <algorithm>
+#include <backend/ITensor.h>
+#include "exec/ExecutorBase.h"
+#include <misc/polymorphic_downcast.h>
+#include "PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+                       const std::vector<backend::IPortableTensor *> output_tensors,
+                       const ir::SubgraphIndex &cond_subg_index,
+                       const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
+                       basic::DynamicMemoryManager *dyn_memory_manager,
+                       const std::shared_ptr<ExternalContext> &external_context)
+  : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
+    _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
+    _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
+{
+  // At this point, executor_map may not have executors of cond subg and body subg
+}
+
+void WhileLayer::run()
+{
+  // Copy "_input_tensors" -> "cond subg inputs"
+  // Run cond subg
+  // Start loop while output of cond subg is ture
+  // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
+  // outputs" -> "body subg inputs" in the second or more iterations
+  // // Run body subg
+  // // Copy "body subg outputs" -> "cond subg inputs"
+  // // Run cond subg
+  // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
+  // "_dst_tensors"
+  auto cond_exec = _executor_map->at(_cond_subg_index).get();
+  auto body_exec = _executor_map->at(_body_subg_index).get();
+
+  // Need a temp tensor to hold the cond subgraph output
+  assert(cond_exec->getOutputTensors().size() == 1);
+  auto cond_output_tensor = [&]() {
+    auto cond_output = cond_exec->getOutputTensors().at(0);
+    auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
+                                           _dyn_memory_manager);
+    tensor->set_dynamic();
+    tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+    return tensor;
+  }();
+
+  VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
+  cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
+  VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
+
+  auto getResultCond = [](backend::ITensor *tensor) -> bool {
+    bool ret = false;
+    tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
+    return ret;
+  };
+
+  std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
+  std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
+  // Copying body inputs to outputs when the loop body is never executed
+  if (!getResultCond(cond_output_tensor.get()))
+  {
+    PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
+    copy_body_inputs_to_op_outputs.run();
+    return;
+  }
+
+  // Need some temp tensors to hold the body subgraph output
+  std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
+  std::vector<IPortableTensor *> temp_outputs;
+  for (auto io_tensor : body_exec->getOutputTensors())
+  {
+    auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
+                                           _dyn_memory_manager);
+    tensor->set_dynamic();
+    tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+    temp_outputs.push_back(tensor.get());
+    temp_outputs_o.push_back(std::move(tensor));
+  }
+
+  std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
+  PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
+
+  const auto body_execute_with_op_inputs = [&]() {
+    VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
+    body_exec->execute(_input_tensors, temp_outputs);
+    VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
+  };
+
+  const auto body_execute_with_body_outputs = [&]() {
+    VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
+    body_exec->execute(_output_tensors, temp_outputs);
+    VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
+  };
+
+  std::function<void()> body_execute = body_execute_with_op_inputs;
+  const auto cond_execute = [&]() {
+    VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
+    cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
+    VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
+  };
+
+  // Loop while Cond subgraph's output is true
+  while (getResultCond(cond_output_tensor.get()))
+  {
+    body_execute();
+    copy_body_outputs_to_op_outputs.run();
+    cond_execute();
+    body_execute = body_execute_with_body_outputs;
+  }
+
+  // Clean-up the temp tensors
+  _dyn_memory_manager->deallocate(cond_output_tensor.get());
+  for (auto tensor : temp_outputs)
+  {
+    _dyn_memory_manager->deallocate(tensor);
+  }
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h

new file mode 100644 (file)

index 0000000..9121027
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include <exec/IExecutor.h>
+#include <exec/IFunction.h>
+#include <ir/OperandIndexSequence.h>
+#include <ir/Graph.h>
+#include "../ExternalContext.h"
+
+#include "backend/basic/MemoryManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class WhileLayer : public ::onert::exec::IFunction
+{
+public:
+  WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+             const std::vector<backend::IPortableTensor *> output_tensors,
+             const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
+             exec::ExecutorMap *executor_map, basic::DynamicMemoryManager *dyn_memory_manager,
+             const std::shared_ptr<ExternalContext> &external_context);
+
+public:
+  void run() override;
+
+private:
+  const ir::SubgraphIndex _cond_subg_index;
+  const ir::SubgraphIndex _body_subg_index;
+  const std::vector<backend::IPortableTensor *> _input_tensors;
+  const std::vector<backend::IPortableTensor *> _output_tensors;
+  exec::ExecutorMap *_executor_map;
+  basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h

deleted file mode 100644 (file)

index 3323cf5..0000000
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
-#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
-
-#include "BackendContext.h"
-#include "Config.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "TensorBuilder.h"
-#include "Tensor.h"
-
-#include <backend/Backend.h>
-
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class Backend : public ::onert::backend::Backend
-{
-public:
-  Backend() : _config{std::make_shared<Config>()} {}
-
-  std::shared_ptr<IConfig> config() const override { return _config; }
-
-  std::unique_ptr<onert::backend::BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
-             bool) const override
-  {
-    const auto &operands = graph.operands();
-    auto context = std::make_unique<BackendContext>(this, &graph);
-    // ControlFlow backend may not build tensors for itself because the backend's operation uses
-    // tensors of other baceknd instead
-    // But the backend builds tensors in case of that the controlflow operation may have constant
-    // input or that consecutive controflow operations exist. We have to make them not to be built
-    // later
-    // 1. Constant input
-    //   These tensors cannot be dynamic tensor, so let's do it as follows:
-    //   - always skip copying
-    //   - if it is operation's input in child subgraph: register "use" as constant input of the
-    //   operations in child subgraph
-    //   - if it is child subgraph's output: register "use" as constant input of the operations
-    //   using it
-    // 2. Consecutive controflow operation's intermediate tensor
-    //   These tensors can be dynamic tensor and this is complicated to support without copying. But
-    //   there is no such case until now, let's support it later
-    // TODO Remove TensorBuilder and ConstantInitializer
-    // TODO Support Consecutive controflow operation's intermediate tensor
-    auto tr = std::make_shared<TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
-    context->tensor_registry = tr;
-    context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr,
-                                                            context->external_context());
-    return context;
-  }
-
-private:
-  std::shared_ptr<IConfig> _config;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.cc b/runtime/onert/core/src/backend/controlflow/BackendContext.cc

deleted file mode 100644 (file)

index 366377e..0000000
--- a/runtime/onert/core/src/backend/controlflow/BackendContext.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendContext.h"
-
-#include "KernelGenerator.h"
-#include "backend/cpu_common/BackendContextHelpers.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-void BackendContext::initConsts()
-{
-  for (auto &op : operation_list())
-  {
-    constant_initializer->setLayout(op.layout);
-    graph()->operations().at(op.index).accept(*constant_initializer);
-  }
-
-  for (auto ind : operand_list())
-  {
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  }
-
-  constant_initializer->run();
-}
-
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                                            const ir::OpSequences &op_seqs,
-                                            const ir::LowerInfoMap &lower_info)
-{
-  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
-                  ir::Remove::DUPLICATED;
-  for (auto index : operand_list())
-  {
-    if (model_io.contains(index))
-      continue;
-    const auto &obj = graph()->operands().at(index);
-    const auto frontend_layout = [&]() {
-      if (obj.getUses().size() == 0)
-        return ir::Layout::UNKNOWN;
-      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
-      for (auto &operation_info : operation_list())
-      {
-        if (operation_info.index == use_op_ind)
-          return operation_info.layout;
-      }
-      return ir::Layout::UNKNOWN;
-    }();
-    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
-    if (permute_factor.backend() != backend())
-      continue;
-    const auto backend_layout = permute_factor.layout();
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
-  }
-
-  // TODO Get compiler options from compiler, and use it rather than getting it from Env
-  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
-  {
-    cpu_common::planTensors(*this, order, op_seqs, lower_info);
-  }
-  else
-  {
-    // For the executors that does not have fixed linear execution order:
-    // To make tensors never be deallocated, this is a workaround to use static memory planner
-    for (auto ind : operand_list())
-    {
-      if (tensor_builder->isRegistered(ind))
-        tensor_builder->notifyFirstUse(ind);
-    }
-  }
-
-  tensor_builder->prepare();
-
-  return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels(const std::vector<ir::OpSequenceIndex> &order,
-                                       const ir::OpSequences &op_seqs)
-{
-  FunctionMap ret;
-
-  for (auto op_seq_ind : order)
-  {
-    const auto &op_seq = op_seqs.at(op_seq_ind);
-    bool assigned = [&]() {
-      for (auto op_info : operation_list())
-        if (op_seq.exist(op_info.index))
-          return true;
-      return false;
-    }();
-    if (!assigned)
-      continue;
-    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
-    ret.emplace_back(op_seq_ind, std::move(fn_seq));
-  }
-
-  initConsts();
-
-  // NOTE For memory optimization, we want to free some operand data
-  for (auto ind : operand_list())
-  {
-    // TODO Remove const_cast
-    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
-    obj.releaseData();
-  }
-
-  for (auto &it : ret)
-  {
-    auto &fn_seq = it.second;
-    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
-  }
-
-  return ret;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.h b/runtime/onert/core/src/backend/controlflow/BackendContext.h

deleted file mode 100644 (file)

index a768d5d..0000000
--- a/runtime/onert/core/src/backend/controlflow/BackendContext.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
-#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
-
-#include <backend/BackendContext.h>
-#include "TensorBuilder.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "ExternalContext.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class BackendContext : public onert::backend::BackendContext
-{
-public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry),
-        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
-        kernel_gen{kernel_gen}, _external_context(std::make_shared<ExternalContext>())
-  {
-  }
-
-  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                              const ir::OpSequences &op_seqs,
-                              const ir::LowerInfoMap &lower_info) override;
-
-  FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
-                         const ir::OpSequences &op_seqs) override;
-
-  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
-
-private:
-  void initConsts();
-  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
-                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
-
-public:
-  // TODO Make it private
-  std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
-  std::shared_ptr<KernelGenerator> kernel_gen;
-
-private:
-  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
-  //      the thread pool is also created in duplicate
-  // TODO Create one ruy context for session
-  std::shared_ptr<ExternalContext> _external_context;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/controlflow/Config.cc b/runtime/onert/core/src/backend/controlflow/Config.cc

deleted file mode 100644 (file)

index 5ec01fe..0000000
--- a/runtime/onert/core/src/backend/controlflow/Config.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Config.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-std::string Config::ID = "controlflow";
-
-bool Config::initialize() { return true; }
-
-ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout)
-{
-  return frontend_layout;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/Config.h b/runtime/onert/core/src/backend/controlflow/Config.h

deleted file mode 100644 (file)

index 6645ed5..0000000
--- a/runtime/onert/core/src/backend/controlflow/Config.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
-#define __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
-
-#include <backend/IConfig.h>
-#include <memory>
-#include <util/ITimer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class Config : public IConfig
-{
-public:
-  static std::string ID;
-  std::string id() override { return ID; }
-  bool initialize() override;
-  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
-  bool supportPermutation() override { return false; }
-  bool supportDynamicTensor() override
-  {
-    // TODO Make this backend to support dynamic tensor or not to build non-constant tensor
-    return true;
-  }
-  bool supportFP16() override { return false; }
-
-  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h

deleted file mode 100644 (file)

index ac97ef9..0000000
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-
-#include <backend/cpu_common/ConstantInitializer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-using ConstantInitializer = cpu_common::ConstantInitializer;
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h

deleted file mode 100644 (file)

index c962d6e..0000000
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-
-#include "TensorRegistry.h"
-#include "Tensor.h"
-
-#include <backend/cpu_common/DynamicTensorManager.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-using DynamicTensorManager = cpu_common::DynamicTensorManager;
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/ExternalContext.h b/runtime/onert/core/src/backend/controlflow/ExternalContext.h

deleted file mode 100644 (file)

index cfb9831..0000000
--- a/runtime/onert/core/src/backend/controlflow/ExternalContext.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
-#define __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
-
-#include <util/ConfigSource.h>
-
-#include <ruy/context.h>
-#include <ruy/context_get_ctx.h>
-#include <ruy/ctx.h>
-#include <ruy/tune.h>
-
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 1;
-}
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-// TODO Unify this with cpu::ExternalContext
-class ExternalContext
-{
-public:
-  ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
-  {
-    setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
-    initPerThreadState();
-  }
-
-  void setMaxNumThreads(int max_num_threads)
-  {
-    const int target_num_threads =
-        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
-    _ruy_context->set_max_num_threads(target_num_threads);
-  }
-
-  ruy::Context *ruy_context() const { return _ruy_context.get(); }
-
-private:
-  void initPerThreadState()
-  {
-    // Initialize per-thread state.
-    const int thread_count = _ruy_context->max_num_threads();
-    auto ctx = ruy::get_ctx(_ruy_context.get());
-    ctx->EnsureThreadSpecificResources(thread_count);
-    for (int i = 0; i < thread_count; i++)
-    {
-      ctx->GetThreadSpecificTuningResolver(i)->SetTuning(ctx->explicit_tuning());
-    }
-  }
-
-private:
-  const std::unique_ptr<ruy::Context> _ruy_context;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.cc b/runtime/onert/core/src/backend/controlflow/IOTensor.cc

deleted file mode 100644 (file)

index 47405ac..0000000
--- a/runtime/onert/core/src/backend/controlflow/IOTensor.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "IOTensor.h"
-
-#include <assert.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
-    : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
-{
-  setUserTensor(nullptr, 0);
-}
-
-void IOTensor::setTensor(IPortableTensor *tensor)
-{
-  assert(tensor);
-  assert(tensor != this);
-  // TODO Handle when layout was changed
-  assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
-  _user_tensor.reset();
-  _tensor = tensor;
-}
-
-void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
-{
-  _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
-  _tensor = _user_tensor.get();
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.h b/runtime/onert/core/src/backend/controlflow/IOTensor.h

deleted file mode 100644 (file)

index a7ed84b..0000000
--- a/runtime/onert/core/src/backend/controlflow/IOTensor.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
-
-#include "backend/IPortableTensor.h"
-#include "UserTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor object that indirects to the tensor it is pointing to.
- *
- * A model I/O tensor could be two types.
- *
- * 1. @c UserTensor, if it is the primary graph
- * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
- *
- * To support these, this object indirects everything to the actual tensor pointer.
- * Exceptionally if it is UserTensor, this class creates and manages it.
- */
-class IOTensor : public IPortableTensor
-{
-public:
-  IOTensor(const ir::OperandInfo &info, ir::Layout layout);
-
-public:
-  void setTensor(IPortableTensor *tensor);
-  void setUserTensor(uint8_t *buffer, size_t size);
-  ir::OperandInfo orig_info() const { return _orig_info; }
-  ir::Layout orig_layout() const { return _orig_layout; }
-
-public:
-  uint8_t *buffer() const override { return _tensor->buffer(); }
-  size_t total_size() const override { return _tensor->total_size(); }
-  size_t dimension(size_t index) const override { return _tensor->dimension(index); }
-  size_t num_dimensions() const override { return _tensor->num_dimensions(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override
-  {
-    return _tensor->calcOffset(coords);
-  }
-  ir::Layout layout() const override { return _tensor->layout(); }
-  ir::DataType data_type() const override { return _tensor->data_type(); }
-  float data_scale() const override { return _tensor->data_scale(); }
-  int32_t data_offset() const override { return _tensor->data_offset(); }
-  bool is_dynamic() const override { return _is_dynamic || (_tensor && _tensor->is_dynamic()); }
-  void set_dynamic() override { _is_dynamic = true; }
-  ir::Shape getShape() const override { return _tensor->getShape(); }
-  void setShape(const ir::Shape &shape) override
-  {
-    // Workaround for IPortableTensor holds _info as its member
-    _info.shape(shape);
-    _tensor->setShape(shape);
-  }
-  bool is_constant() const override { return _tensor->is_constant(); }
-  bool applyShape(const ir::Shape &shape) override
-  {
-    // Workaround for IPortableTensor holds _info as its member
-    _info.shape(shape);
-    return _tensor->applyShape(shape);
-  }
-
-private:
-  const ir::OperandInfo _orig_info;
-  const ir::Layout _orig_layout;
-  bool _is_dynamic{false};
-  IPortableTensor *_tensor{nullptr};        //< The actual tensor that is indirected
-  std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc

deleted file mode 100644 (file)

index 2606f04..0000000
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <backend/BackendContext.h>
-#include <util/Utils.h>
-#include "kernel/IfLayer.h"
-#include "kernel/WhileLayer.h"
-#include "kernel/PermuteLayer.h"
-#include "exec/ExecutorBase.h"
-#include "exec/FunctionSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
-                                 const std::shared_ptr<TensorRegistry> &tensor_reg,
-                                 const std::shared_ptr<ExternalContext> &external_context)
-    : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
-      _tensor_registries{}, _executor_map{nullptr}, _external_context{external_context}
-{
-  UNUSED_RELEASE(_graph);
-  UNUSED_RELEASE(_tensor_registries);
-  UNUSED_RELEASE(_executor_map);
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
-  assert(!_return_fn_seq);
-  assert(_dyn_tensor_manager);
-  assert(_tensor_reg);
-
-  auto dyn_shape_inferer =
-      std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
-
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
-  // Prepare to handle dynamic tensors later
-  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
-  {
-    dyn_ctx->op_seq = &op_seq;
-    dyn_ctx->operations = &_graph.operations();
-    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
-
-    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
-  }
-
-  for (const auto &op_idx : op_seq.operations())
-  {
-    const auto &node = _graph.operations().at(op_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
-  }
-}
-
-void KernelGenerator::visit(const ir::operation::If &node)
-{
-  const auto then_subg_index = node.param().then_subg_index;
-  const auto else_subg_index = node.param().else_subg_index;
-
-  std::vector<backend::IPortableTensor *> input_tensors;
-  for (const auto input_index : node.getInputs())
-  {
-    auto input_tensor = getPortableTensor(input_index);
-    input_tensors.emplace_back(input_tensor);
-  }
-
-  std::vector<backend::IPortableTensor *> output_tensors;
-  for (const auto output_index : node.getOutputs())
-  {
-    auto output_tensor = getPortableTensor(output_index);
-    output_tensors.emplace_back(output_tensor);
-  }
-
-  // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
-  // creating executor recusively
-  const auto cond_tensor = input_tensors.front();
-  input_tensors.erase(input_tensors.begin());
-  auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>(
-      cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
-      _external_context);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Permute &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  // Add PermuteLayer
-  std::vector<ITensor *> output_tensors{getTensor(output_index)};
-  std::vector<ITensor *> input_tensors{getTensor(input_index)};
-
-  auto fn =
-      std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, _external_context);
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::While &node)
-{
-  const auto cond_subg_index = node.param().cond_subg_index;
-  const auto body_subg_index = node.param().body_subg_index;
-
-  // This op does not support input as a constant, because controlflow backend does not have
-  // TensorBuilder
-  std::vector<backend::IPortableTensor *> input_tensors;
-  for (const auto input_index : node.getInputs())
-  {
-    auto input_tensor = getPortableTensor(input_index);
-    input_tensors.emplace_back(input_tensor);
-  }
-
-  std::vector<backend::IPortableTensor *> output_tensors;
-  for (const auto output_index : node.getOutputs())
-  {
-    auto output_tensor = getPortableTensor(output_index);
-    output_tensors.emplace_back(output_tensor);
-  }
-
-  // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
-  // creating executor recusively
-  auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>(
-      input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
-      _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
-
-  _return_fn = std::move(fn);
-}
-
-backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
-{
-  // get Tensor from all tensor registries (for Permute op)
-  auto ret = _tensor_registries.getITensor(index);
-  assert(ret != nullptr);
-  return ret;
-}
-
-backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
-{
-  auto ret = _tensor_reg->getPortableTensor(index);
-  assert(ret != nullptr);
-  return ret;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h

deleted file mode 100644 (file)

index 7b395d1..0000000
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-
-#include <exec/IExecutor.h>
-#include "ExternalContext.h"
-#include <ir/Graph.h>
-#include "TensorBuilder.h"
-#include "compiler/TensorRegistries.h"
-#include "backend/cpu_common/KernelGeneratorBase.h"
-#include "TensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class KernelGenerator : public cpu_common::KernelGeneratorBase
-{
-public:
-  KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
-                  const std::shared_ptr<TensorRegistry> &tensor_reg,
-                  const std::shared_ptr<ExternalContext> &external_context);
-
-  void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
-  {
-    _tensor_registries = tensor_registries;
-  }
-  void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
-  {
-    // FIXME Using shared_ptr's raw pointer!
-    _executor_map = executor_map.get();
-  }
-
-  void visit(const ir::OpSequence &) override;
-  void visit(const ir::operation::If &) override;
-  void visit(const ir::operation::Permute &) override;
-  void visit(const ir::operation::While &) override;
-
-private:
-  backend::ITensor *getTensor(const ir::OperandIndex &index);
-  backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
-
-private:
-  const ir::Graph &_graph;
-  DynamicTensorManager *_dyn_tensor_manager;
-  std::shared_ptr<TensorRegistry> _tensor_reg;
-  compiler::TensorRegistries _tensor_registries;
-  exec::ExecutorMap *_executor_map;
-  const std::shared_ptr<ExternalContext> _external_context;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/Tensor.h b/runtime/onert/core/src/backend/controlflow/Tensor.h

deleted file mode 100644 (file)

index 87951a9..0000000
--- a/runtime/onert/core/src/backend/controlflow/Tensor.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-
-#include <backend/cpu_common/Tensor.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-using Tensor = cpu_common::Tensor;
-using ExternalTensor = cpu_common::ExternalTensor;
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc

deleted file mode 100644 (file)

index a767f0e..0000000
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
-    : _tensor_reg{tensor_reg},
-      _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
-      _static_tensor_mgr{
-          new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
-{
-  /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                       ir::Layout backend_layout)
-{
-  _tensor_info_map.emplace(ind, info);
-
-  _tensor_layout_map.insert({ind, backend_layout});
-
-  if (info.isDynamic())
-  {
-    _dynamic_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind]);
-  }
-  else
-  {
-    _static_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind], info.isConstant());
-  }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
-  // TODO Enhance the way of checking user tensors
-  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
-    return;
-
-  const auto tensor_info = _tensor_info_map.at(ind);
-
-  if (!nativeOwnTensorAt(ind)->is_dynamic())
-  {
-    const auto size = tensor_info.total_size();
-    _static_tensor_mgr->claimPlan(ind, size);
-  }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
-  // TODO Enhance the way of checking user tensors
-  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
-    return;
-
-  if (!nativeOwnTensorAt(ind)->is_dynamic())
-  {
-    _static_tensor_mgr->releasePlan(ind);
-  }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
-  // User tensors are not registered in _tensor_info_map but objects for them are exist
-  // in the tensor registry.
-  // TODO Enhance the way of checking user tensors
-  if (_tensor_reg->getITensor(ind))
-    return true;
-  return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
-  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
-  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
-{
-  return _dynamic_tensor_mgr.get();
-}
-
-cpu_common::Tensor *TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getNativeOwnTensor(ind);
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h

deleted file mode 100644 (file)

index d2e3076..0000000
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
-
-#include <backend/cpu_common/StaticTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/cpu_common/Tensor.h>
-
-#include <ir/OperandIndexMap.h>
-
-#include <unordered_map>
-
-#include "DynamicTensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class TensorBuilder
-{
-public:
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
-
-  /**
-   * @brief     Register tensor information to allocate on CPU backend
-   * @param[in] ind    Operand index
-   * @param[in] info   Operand information
-   * @param[in] layout Operand data layout
-   */
-  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout);
-
-  void notifyFirstUse(const ir::OperandIndex &);
-  void notifyLastUse(const ir::OperandIndex &);
-
-  bool isRegistered(const ir::OperandIndex &) const;
-
-  void prepare(void);
-  void allocate();
-  void postFunctionPrepare() { /* DO NOTHING */}
-
-  DynamicTensorManager *dynamicTensorManager(void);
-
-  /**
-   * @brief Get tensor with a specific OperandIndex.
-   * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
-   *        If not, program will crash with assert or exception.
-   * @return operand::Tensor *
-   */
-  cpu_common::Tensor *nativeOwnTensorAt(const ir::OperandIndex &ind);
-
-private:
-  const std::shared_ptr<TensorRegistry> _tensor_reg;
-  std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
-  std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
-  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-  ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h

deleted file mode 100644 (file)

index 901f0ae..0000000
--- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
-
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorRegistry.h"
-#include "Tensor.h"
-#include "IOTensor.h"
-#include <assert.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor registry class for controlflow backend
- *
- * This class contains three types of tensors. Two native tensors(tensors that are managed by this
- * backend) and the other is migrant tensor.
- *
- * - NativeIOTensor  - @c IOTensor managed by this backend ( in @c _base_reg )
- *     - NOTE The tensor it actually points to can be from another backend
- * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
- * - MigrantTensor   - @c IPortableTensor managed by other backends
- *
- * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
- *
- */
-class TensorRegistry : public ITensorRegistry
-{
-public:
-  TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
-
-  ITensor *getITensor(const ir::OperandIndex &ind) override
-  {
-    auto base_tensor = _base_reg->getITensor(ind);
-    if (base_tensor)
-      return base_tensor;
-    return getNativeIOTensor(ind);
-  }
-
-  ITensor *getNativeITensor(const ir::OperandIndex &ind) override
-  {
-    auto base_tensor = _base_reg->getNativeITensor(ind);
-    if (base_tensor)
-      return base_tensor;
-    return getNativeIOTensor(ind);
-  }
-
-  IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
-  {
-    auto base_tensor = _base_reg->getPortableTensor(ind);
-    if (base_tensor)
-      return base_tensor;
-    return getNativeIOTensor(ind);
-  }
-
-  IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
-  {
-    auto base_tensor = _base_reg->getNativeTensor(ind);
-    if (base_tensor)
-      return base_tensor;
-    return getNativeIOTensor(ind);
-  }
-
-  Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
-  {
-    return _base_reg->getNativeTensor(ind);
-  }
-
-  IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
-  {
-    auto tensor = _native_io_tensors.find(ind);
-    if (tensor != _native_io_tensors.end())
-      return tensor->second.get();
-    return nullptr;
-  }
-
-  bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
-  {
-    assert(tensor);
-    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
-    _base_reg->setMigrantTensor(ind, tensor);
-    return true;
-  }
-
-  void setNativeOwnTensor(ir::OperandIndex ind, std::unique_ptr<Tensor> &&tensor)
-  {
-    assert(tensor);
-    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
-    _base_reg->setNativeTensor(ind, std::move(tensor));
-  }
-
-  void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
-  {
-    assert(tensor);
-    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
-    _native_io_tensors[ind] = std::move(tensor);
-  }
-
-  const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
-  {
-    return _native_io_tensors;
-  }
-  std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
-
-private:
-  std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
-  ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.cc b/runtime/onert/core/src/backend/controlflow/UserTensor.cc

deleted file mode 100644 (file)

index 5081a90..0000000
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "UserTensor.h"
-
-#include "util/Exceptions.h"
-#include "ir/DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
-{
-  size_t rank = num_dimensions();
-  size_t offset = 0;
-  for (size_t i = 0; i < rank; ++i)
-  {
-    offset = offset * dimension(i) + coords[i];
-  }
-  offset *= sizeOfDataType(data_type());
-  return offset;
-}
-
-bool UserTensor::applyShape(const ir::Shape &new_shape)
-{
-  // User tensors cannot be reallocated.
-  auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
-  if (total_size() < new_size)
-    throw InsufficientBufferSizeException{"User given buffer size is too small."};
-  setShape(new_shape);
-  return true;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.h b/runtime/onert/core/src/backend/controlflow/UserTensor.h

deleted file mode 100644 (file)

index 7aa62a8..0000000
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
-
-#include "ir/OperandInfo.h"
-#include "backend/IPortableTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor object that is for Input and Output tensors from the user.
- *
- * This class is a wrapped buffer that is allocated by the user. So it does not have resposibility
- * on allocation nor deallocation. All the model input/output tensors are wrapped with this class
- * for execution.
- *
- */
-class UserTensor : public IPortableTensor
-{
-public:
-  UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
-      : IPortableTensor{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
-  {
-  }
-
-  UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
-  {
-  }
-
-public:
-  void setBuffer(uint8_t *buffer, size_t size)
-  {
-    _buffer = buffer;
-    _size = size;
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer; }
-  size_t total_size() const override { return _size; }
-  size_t dimension(size_t index) const override { return _info.shape().dim(index); }
-  size_t num_dimensions() const override { return _info.shape().rank(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override;
-  ir::Layout layout() const override { return _layout; }
-  ir::DataType data_type() const override { return _info.typeInfo().type(); }
-  float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_offset() const override { return _info.typeInfo().offset(); }
-  bool is_dynamic() const override { return _dynamic; }
-  void set_dynamic() override { _dynamic = true; }
-  ir::Shape getShape() const override { return _info.shape(); }
-  void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
-  bool is_constant() const override { return false; }
-  bool applyShape(const ir::Shape &) override;
-
-private:
-  ir::Layout _layout;
-  uint8_t *_buffer;
-  size_t _size;
-  bool _dynamic;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc

deleted file mode 100644 (file)

index 1d786c4..0000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "IfLayer.h"
-
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include "PermuteLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
-                 const std::vector<backend::IPortableTensor *> input_tensors,
-                 const std::vector<backend::IPortableTensor *> output_tensors,
-                 const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
-                 exec::ExecutorMap *executor_map,
-                 const std::shared_ptr<ExternalContext> &external_context)
-    : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
-      _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
-      _executor_map{executor_map}, _external_context{external_context}
-{
-  // At this point, executor_map may not have executors of then subg and else subg
-}
-
-void IfLayer::run()
-{
-  // Check condition
-  // // If true
-  // // // Set _input_tensors -> then-subg's inputs
-  // // // Set outputs of then-subg -> _output_tensors
-  // // // Run then-subg
-  // // Else
-  // // // Set _input_tensors -> else-subg's inputs
-  // // // Set outputs of else-subg -> _output_tensors
-  // // // Run else-subg
-
-  auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
-    bool ret = false;
-    tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
-    return ret;
-  };
-
-  exec::IExecutor *subg_exec = nullptr;
-  bool cond_result = getResultCond(_cond_tensor);
-  if (cond_result)
-  {
-    VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
-    subg_exec = _executor_map->at(_then_subg_index).get();
-  }
-  else
-  {
-    VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
-    subg_exec = _executor_map->at(_else_subg_index).get();
-  }
-
-  subg_exec->execute(_input_tensors, _output_tensors);
-  VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
-              << std::endl;
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h

deleted file mode 100644 (file)

index 967552f..0000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
-#include "../ExternalContext.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class IfLayer : public ::onert::exec::IFunction
-{
-public:
-  IfLayer(backend::IPortableTensor *cond_tensor,
-          const std::vector<backend::IPortableTensor *> input_tensors,
-          const std::vector<backend::IPortableTensor *> output_tensors,
-          const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
-          exec::ExecutorMap *executor_map,
-          const std::shared_ptr<ExternalContext> &external_context);
-
-public:
-  void run() override;
-
-private:
-  backend::IPortableTensor *_cond_tensor;
-  const std::vector<backend::IPortableTensor *> _input_tensors;
-  const std::vector<backend::IPortableTensor *> _output_tensors;
-  const ir::SubgraphIndex _then_subg_index;
-  const ir::SubgraphIndex _else_subg_index;
-  exec::ExecutorMap *_executor_map;
-  const std::shared_ptr<ExternalContext> _external_context;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc

deleted file mode 100644 (file)

index 8b79ea0..0000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PermuteLayer.h"
-
-#include "exec/ShapeConverter.h"
-
-#include "ruy/context.h" // from @ruy
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
-                           const std::vector<ITensor *> &dst_tensors,
-                           const std::shared_ptr<ExternalContext> &external_context)
-    : _external_context{external_context}, _tasks_map{}
-{
-  assert(src_tensors.size() == dst_tensors.size());
-  _src_tensors = src_tensors;
-  _dst_tensors = dst_tensors;
-  _src_tensors_offsets.resize(src_tensors.size());
-  _dst_tensors_offsets.resize(dst_tensors.size());
-}
-
-void PermuteLayer::optimize()
-{
-  // Remove copying of tensor as nullptr
-  auto src_it = _src_tensors.begin();
-  auto dst_it = _dst_tensors.begin();
-  auto src_offsets_it = _src_tensors_offsets.begin();
-  auto dst_offsets_it = _dst_tensors_offsets.begin();
-  while (src_it != _src_tensors.end())
-  {
-    if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
-    {
-      src_it = _src_tensors.erase(src_it);
-      dst_it = _dst_tensors.erase(dst_it);
-      src_offsets_it = _src_tensors_offsets.erase(src_offsets_it);
-      dst_offsets_it = _dst_tensors_offsets.erase(dst_offsets_it);
-    }
-    else
-    {
-      auto src = *src_it;
-      auto dst = *dst_it;
-      src_offsets_it->resize(0);
-      dst_offsets_it->resize(0);
-      if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
-        throw std::runtime_error("data type does not match");
-      const auto permute_type = [&]() -> PermuteType {
-        if (src->num_dimensions() == 4 && src->layout() == ir::Layout::NHWC &&
-            dst->layout() == ir::Layout::NCHW)
-        {
-          return PermuteType::NHWC_TO_NCHW;
-        }
-        else if (src->num_dimensions() == 4 && src->layout() == ir::Layout::NCHW &&
-                 dst->layout() == ir::Layout::NHWC)
-        {
-          return PermuteType::NCHW_TO_NHWC;
-        }
-        else
-        {
-          return PermuteType::COPY;
-        }
-      }();
-      auto fn = [&](backend::ITensor &src_tensor) {
-        dst->access([&](backend::ITensor &dst_tensor) {
-          // NOTE The buffer of both tensor can be nullptr in this step
-          const auto data_size = ir::sizeOfDataType(src_tensor.data_type());
-
-          if (permute_type == PermuteType::COPY)
-          {
-            if ((!src_tensor.has_padding() && !dst_tensor.has_padding()))
-            {
-              const auto num_elements = src_tensor.getShape().num_elements();
-              const int thread_count = _external_context->ruy_context()->max_num_threads() <
-                                               static_cast<int>(num_elements)
-                                           ? _external_context->ruy_context()->max_num_threads()
-                                           : num_elements;
-
-              std::vector<PermuteWorkerTask> tasks;
-              auto start = 0;
-              for (auto i = 0; i < thread_count; ++i)
-              {
-                int end = start + (num_elements - start) / (thread_count - i);
-                tasks.emplace_back(src_tensor.buffer(), dst_tensor.buffer(), start * data_size,
-                                   start * data_size, (end - start) * data_size);
-                start = end;
-              }
-              assert(tasks.size() >= 1);
-              _tasks_map[src] = std::move(tasks);
-            }
-            else
-            {
-              auto loop_shape = src_tensor.getShape();
-
-              auto copy_axis = loop_shape.rank() - 1;
-              copy_axis = copy_axis < 0 ? 1 : copy_axis;
-              const auto copy_len = loop_shape.dim(copy_axis) * data_size;
-              loop_shape.dim(copy_axis) = 1;
-
-              appendPermuteTasks(src, dst, loop_shape, copy_len);
-            }
-          }
-          else
-          {
-            assert(src_tensor.num_dimensions() == 4 && (permute_type == PermuteType::NHWC_TO_NCHW ||
-                                                        permute_type == PermuteType::NCHW_TO_NHWC));
-            const auto loop_shape = src_tensor.getShape();
-            const auto copy_len = data_size;
-
-            appendPermuteTasks(src, dst, loop_shape, copy_len);
-          }
-        });
-      };
-      src->access(fn);
-      src_it++;
-      dst_it++;
-      src_offsets_it++;
-      dst_offsets_it++;
-    }
-  }
-}
-
-void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
-                                      const ir::Shape &loop_shape, size_t size)
-{
-  size_t distributed_dim = 0;
-  if (src_tensor->layout() == dst_tensor->layout())
-  {
-    for (size_t i = 1; i < src_tensor->num_dimensions() - 1; ++i)
-    {
-      distributed_dim =
-          src_tensor->dimension(distributed_dim) < src_tensor->dimension(i) ? i : distributed_dim;
-    }
-  }
-  const auto distributed_dim_val = src_tensor->dimension(distributed_dim);
-  const int thread_count =
-      _external_context->ruy_context()->max_num_threads() < static_cast<int>(distributed_dim_val)
-          ? _external_context->ruy_context()->max_num_threads()
-          : distributed_dim_val;
-  // NOTE Do not remove this assertion. It would cause performance degradation by new threads to be
-  // created in the context's thread pool
-  assert(thread_count <= _external_context->ruy_context()->max_num_threads());
-
-  std::vector<PermuteWorkerTask> tasks;
-  int start = 0;
-  auto one_thread_loop_shape = loop_shape;
-  for (auto i = 0; i < thread_count; ++i)
-  {
-    ir::Coordinates start_coords(one_thread_loop_shape.rank());
-    start_coords.set(distributed_dim, start);
-    int end = start + (distributed_dim_val - start) / (thread_count - i);
-    one_thread_loop_shape.dim(distributed_dim) = end - start;
-    tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
-    start = end;
-  }
-  assert(tasks.size() >= 1);
-  _tasks_map[src_tensor] = std::move(tasks);
-}
-
-void PermuteLayer::runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer)
-{
-  assert(src->getShape().num_elements() * ir::sizeOfDataType(src->data_type()) <=
-         src->total_size());
-  std::vector<PermuteWorkerTask> &tasks = _tasks_map.at(src);
-  for (size_t i = 0; i < tasks.size(); ++i)
-  {
-    tasks.at(i).setBuffers(src->buffer(), dst_buffer);
-  }
-  assert(tasks.size() >= 1);
-  _external_context->ruy_context()->mutable_thread_pool()->Execute(tasks.size(), tasks.data());
-}
-
-void PermuteLayer::run()
-{
-  assert(_src_tensors.size() == _dst_tensors.size());
-  // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
-  // reasons:
-  // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
-  // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
-  // subgraphs(with other backends)
-  // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
-  // reasons
-
-  // check if output is not dynamic
-  for (size_t i = 0; i < _src_tensors.size(); ++i)
-  {
-    auto dst_tensor = _dst_tensors.at(i);
-    auto src_tensor = _src_tensors.at(i);
-    if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
-    {
-      // getting output shape
-      auto src_shape = src_tensor->getShape();
-
-      // set output shape and output buffer
-      ir::Shape new_shape =
-          exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
-
-      try
-      {
-        if (!dst_tensor->applyShape(new_shape))
-          throw std::runtime_error{
-              "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
-        assert(dst_tensor->buffer() != nullptr);
-      }
-      catch (const std::out_of_range &e)
-      {
-        std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
-                     "dynamic tensor"
-                  << '\n';
-        throw;
-      }
-    }
-    assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
-           dst_tensor->getShape());
-  }
-  assert(_src_tensors.size() == _dst_tensors.size());
-  assert(_src_tensors.size() == _src_tensors_offsets.size());
-  assert(_dst_tensors.size() == _dst_tensors_offsets.size());
-  auto src_it = _src_tensors.begin();
-  auto dst_it = _dst_tensors.begin();
-  auto src_offsets_it = _src_tensors_offsets.begin();
-  auto dst_offsets_it = _dst_tensors_offsets.begin();
-  while (src_it != _src_tensors.end())
-  {
-    auto src = *src_it;
-    auto dst = *dst_it;
-    auto &src_offsets = *src_offsets_it;
-    auto &dst_offsets = *dst_offsets_it;
-
-    if (src->total_size() == 0)
-    {
-      assert(dst->total_size() == 0);
-    }
-    else
-    {
-      if (src != dst)
-      {
-        // Conditions to run permutation with multithreading
-        // 1. The tasks for multithreathing was created
-        // 2. The tasks's size > 1
-        // 3. Both tensors are not dynamic
-        if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
-            src->is_dynamic() || dst->is_dynamic())
-        {
-          permute(src, dst, src->num_dimensions(), src_offsets, dst_offsets);
-        }
-        // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
-        else if (dst->needMemoryMap() && !dst->is_subtensor())
-        {
-          if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
-          {
-            // This is more effective than multi-threading
-            src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
-          }
-          else
-          {
-            // TODO Optimize this block in case of that padding size of dst is big.
-            _buffers_map[dst].reserve(dst->total_size());
-            auto dst_buffer = _buffers_map[dst].data();
-
-            src->access([&](backend::ITensor &) { runPermuteTasks(src, dst_buffer); });
-            dst->enqueueWriteBuffer(dst_buffer, false);
-          }
-        }
-        else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
-                 !dst->has_padding() && src->layout() == dst->layout())
-        {
-          // This is more effective than multi-threading
-          assert(!dst->needMemoryMap());
-          dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
-        }
-        else
-        {
-          auto fn = [&](backend::ITensor &) {
-            dst->access([&](backend::ITensor &) { runPermuteTasks(src, dst->buffer()); });
-          };
-          src->access(fn);
-        }
-      }
-    }
-    src_it++;
-    dst_it++;
-    src_offsets_it++;
-    dst_offsets_it++;
-  }
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h

deleted file mode 100644 (file)

index 6fb69b6..0000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-
-#include "exec/IPermuteFunction.h"
-#include "exec/IExecutor.h"
-#include "../ExternalContext.h"
-#include "ruy/thread_pool.h" // from @ruy
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class PermuteLayer : public onert::exec::IPermuteFunction
-{
-public:
-  PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
-               const std::shared_ptr<ExternalContext> &external_context);
-
-  void optimize() override;
-
-  void run() override;
-
-private:
-  std::shared_ptr<ExternalContext> _external_context;
-
-private:
-  void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
-                          const ir::Shape &loop_shape, size_t size);
-
-  void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);
-
-  struct PermuteWorkerTask : ruy::Task
-  {
-    using Strides = ir::Coordinates;
-
-    PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
-                      const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
-        : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
-          _src_start_offset{src_tensor.calcOffset(start_coords)},
-          _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
-          _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
-          _dst_layout{dst_tensor.layout()}, _is_permutation{true}
-    {
-      // Set strides
-      setStrides(src_tensor, &_src_strides);
-      setStrides(dst_tensor, &_dst_strides);
-
-      _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
-    }
-    // Constructor for a copy
-    PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
-                      uint32_t dst_start_offset, size_t size)
-        : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
-          _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0}, _loop_shape{1},
-          _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
-    {
-      // DO NOTHING
-    }
-    void setBuffers(const uint8_t *src_buffer, uint8_t *dst_buffer)
-    {
-      _src_buffer = src_buffer;
-      _dst_buffer = dst_buffer;
-    }
-    void Run() override
-    {
-      ShapeLoop(_loop_shape, [&](const onert::ir::Coordinates &coords) {
-        size_t src_offset = _src_start_offset;
-        size_t dst_offset = _dst_start_offset;
-        assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
-        ir::Coordinates dst_coords = coords;
-        if (_is_permutation)
-        {
-          dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
-        }
-        for (auto i = 0; i < _loop_shape.rank(); ++i)
-        {
-          assert(coords[i] >= 0 && dst_coords[i] >= 0);
-          src_offset += coords[i] * _src_strides[i];
-          dst_offset += dst_coords[i] * _dst_strides[i];
-        }
-        memcpy(_dst_buffer + dst_offset, _src_buffer + src_offset, _size);
-      });
-    }
-
-  private:
-    void setStrides(const ITensor &tensor, Strides *strides)
-    {
-      const size_t rank = tensor.num_dimensions();
-      for (size_t i = 0; i < rank; ++i)
-      {
-        ir::Coordinates no_step(rank), one_step(rank);
-        one_step.set(i, 1);
-        if (tensor.dimension(i) > 1)
-        {
-          strides->set(i, tensor.calcOffset(one_step) - tensor.calcOffset(no_step));
-        }
-        else
-        {
-          // If dimension value is 0 or 1, the stride of the dimension will be not used
-          // Do not call calcOffset() with coordinate value that is greater than dimension value
-          strides->set(i, 0);
-        }
-        assert((*strides)[i] >= 0);
-      }
-    }
-
-  private:
-    const uint8_t *_src_buffer;
-    uint8_t *_dst_buffer;
-    size_t _src_start_offset;
-    size_t _dst_start_offset;
-    Strides _src_strides;
-    Strides _dst_strides;
-    const ir::Shape _loop_shape;
-    const size_t _size;
-    const ir::Layout _src_layout;
-    const ir::Layout _dst_layout;
-    bool _is_permutation;
-  };
-  std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc

deleted file mode 100644 (file)

index a4b5aa5..0000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WhileLayer.h"
-
-#include <algorithm>
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
-#include "PermuteLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
-                       const std::vector<backend::IPortableTensor *> output_tensors,
-                       const ir::SubgraphIndex &cond_subg_index,
-                       const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
-                       cpu_common::DynamicMemoryManager *dyn_memory_manager,
-                       const std::shared_ptr<ExternalContext> &external_context)
-    : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
-      _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
-      _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
-{
-  // At this point, executor_map may not have executors of cond subg and body subg
-}
-
-void WhileLayer::run()
-{
-  // Copy "_input_tensors" -> "cond subg inputs"
-  // Run cond subg
-  // Start loop while output of cond subg is ture
-  // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
-  // outputs" -> "body subg inputs" in the second or more iterations
-  // // Run body subg
-  // // Copy "body subg outputs" -> "cond subg inputs"
-  // // Run cond subg
-  // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
-  // "_dst_tensors"
-  auto cond_exec = _executor_map->at(_cond_subg_index).get();
-  auto body_exec = _executor_map->at(_body_subg_index).get();
-
-  // Need a temp tensor to hold the cond subgraph output
-  assert(cond_exec->getOutputTensors().size() == 1);
-  auto cond_output_tensor = [&]() {
-    auto cond_output = cond_exec->getOutputTensors().at(0);
-    auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
-                                           _dyn_memory_manager);
-    tensor->set_dynamic();
-    tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
-    return tensor;
-  }();
-
-  VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
-  cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
-  VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
-
-  auto getResultCond = [](backend::ITensor *tensor) -> bool {
-    bool ret = false;
-    tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
-    return ret;
-  };
-
-  std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
-  std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
-  // Copying body inputs to outputs when the loop body is never executed
-  if (!getResultCond(cond_output_tensor.get()))
-  {
-    PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
-    copy_body_inputs_to_op_outputs.run();
-    return;
-  }
-
-  // Need some temp tensors to hold the body subgraph output
-  std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
-  std::vector<IPortableTensor *> temp_outputs;
-  for (auto io_tensor : body_exec->getOutputTensors())
-  {
-    auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
-                                           _dyn_memory_manager);
-    tensor->set_dynamic();
-    tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
-    temp_outputs.push_back(tensor.get());
-    temp_outputs_o.push_back(std::move(tensor));
-  }
-
-  std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
-  PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
-
-  const auto body_execute_with_op_inputs = [&]() {
-    VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
-    body_exec->execute(_input_tensors, temp_outputs);
-    VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
-  };
-
-  const auto body_execute_with_body_outputs = [&]() {
-    VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
-    body_exec->execute(_output_tensors, temp_outputs);
-    VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
-  };
-
-  std::function<void()> body_execute = body_execute_with_op_inputs;
-  const auto cond_execute = [&]() {
-    VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
-    cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
-    VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
-  };
-
-  // Loop while Cond subgraph's output is true
-  while (getResultCond(cond_output_tensor.get()))
-  {
-    body_execute();
-    copy_body_outputs_to_op_outputs.run();
-    cond_execute();
-    body_execute = body_execute_with_body_outputs;
-  }
-
-  // Clean-up the temp tensors
-  _dyn_memory_manager->deallocate(cond_output_tensor.get());
-  for (auto tensor : temp_outputs)
-  {
-    _dyn_memory_manager->deallocate(tensor);
-  }
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h

deleted file mode 100644 (file)

index d3924c8..0000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
-#include <exec/IFunction.h>
-#include <ir/OperandIndexSequence.h>
-#include <ir/Graph.h>
-#include "../ExternalContext.h"
-
-#include "backend/cpu_common/MemoryManager.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class WhileLayer : public ::onert::exec::IFunction
-{
-public:
-  WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
-             const std::vector<backend::IPortableTensor *> output_tensors,
-             const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
-             exec::ExecutorMap *executor_map, cpu_common::DynamicMemoryManager *dyn_memory_manager,
-             const std::shared_ptr<ExternalContext> &external_context);
-
-public:
-  void run() override;
-
-private:
-  const ir::SubgraphIndex _cond_subg_index;
-  const ir::SubgraphIndex _body_subg_index;
-  const std::vector<backend::IPortableTensor *> _input_tensors;
-  const std::vector<backend::IPortableTensor *> _output_tensors;
-  exec::ExecutorMap *_executor_map;
-  cpu_common::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
-  const std::shared_ptr<ExternalContext> _external_context;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
diff --git a/runtime/onert/core/src/backend/cpu_common/Allocator.cc b/runtime/onert/core/src/backend/cpu_common/Allocator.cc

deleted file mode 100644 (file)

index 0ba444e..0000000
--- a/runtime/onert/core/src/backend/cpu_common/Allocator.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/Allocator.h"
-
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-Allocator::Allocator(uint32_t capacity)
-{
-  _base = std::make_unique<uint8_t[]>(capacity);
-
-  VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
-  VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc

deleted file mode 100644 (file)

index 732b03c..0000000
--- a/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/BackendContextHelpers.h"
diff --git a/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc

deleted file mode 100644 (file)

index 610ba5f..0000000
--- a/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/ConstantInitializer.h"
-#include "backend/cpu_common/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
-{
-  // DO NOTHING
-}
-
-void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
-                                                     const ir::Operand &obj)
-{
-  registerExternalInitializer(index, obj);
-}
-
-void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
-                                                      const ir::Operand &obj)
-{
-  // For only CONSTANTS
-  // TODO Add to check if tensor has been allocated
-  if (!obj.isConstant())
-    return;
-
-  _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
-    auto data = model_obj.shareData();
-    assert(data && data->base());
-    ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
-    tensor.setData(data);
-  };
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc

deleted file mode 100644 (file)

index 15c2dfe..0000000
--- a/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/ConstantInitializerBase.h"
-
-#include <Half.h>
-
-using float16 = Half;
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-void ConstantInitializerBase::registerCopyInitializer(const ir::OperandIndex &index,
-                                                      const ir::Operand &obj)
-{
-  // For only CONSTANTS
-  // TODO Add to check if tensor has been allocated
-  if (!obj.isConstant())
-    return;
-
-  const auto type = obj.typeInfo().type();
-  using ir::DataType;
-
-  switch (type)
-  {
-    case DataType::FLOAT32:
-      _init_map[index] = copyInit<float>;
-      break;
-    case DataType::INT32:
-      _init_map[index] = copyInit<int32_t>;
-      break;
-    case DataType::UINT32:
-      _init_map[index] = copyInit<uint32_t>;
-      break;
-    case DataType::BOOL8:
-    case DataType::QUANT_UINT8_ASYMM:
-      _init_map[index] = copyInit<uint8_t>;
-      break;
-    case DataType::QUANT_INT8_SYMM:
-    case DataType::QUANT_INT8_ASYMM:
-      _init_map[index] = copyInit<int8_t>;
-      break;
-    case DataType::FLOAT16:
-      _init_map[index] = copyInit<float16>;
-      break;
-    case DataType::INT64:
-      _init_map[index] = copyInit<int64_t>;
-      break;
-    default:
-      throw std::runtime_error("Not supported, yet");
-      break;
-  }
-}
-
-void ConstantInitializerBase::registerPermuteInitializer(const ir::OperandIndex &index,
-                                                         const ir::Operand &obj)
-{
-  // For only CONSTANTS
-  // TODO Add to check if tensor has been allocated
-  if (!obj.isConstant())
-    return;
-
-  const auto type = obj.typeInfo().type();
-  using ir::DataType;
-  using namespace std::placeholders;
-
-  switch (type)
-  {
-    case DataType::FLOAT32:
-      _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
-      break;
-    case DataType::INT32:
-      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
-      break;
-    case DataType::UINT32:
-      _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout);
-      break;
-    case DataType::BOOL8:
-    case DataType::QUANT_UINT8_ASYMM:
-      _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout);
-      break;
-    case DataType::QUANT_INT8_SYMM:
-    case DataType::QUANT_INT8_ASYMM:
-      _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout);
-      break;
-    case DataType::FLOAT16:
-      _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout);
-      break;
-    case DataType::INT64:
-      _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout);
-      break;
-    default:
-      throw std::runtime_error("Not supported, yet");
-      break;
-  }
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc

deleted file mode 100644 (file)

index 740248c..0000000
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/DynamicTensorManager.h"
-
-#include "util/logging.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg)
-    : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg}
-{
-  // DO NOTHING
-}
-
-void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
-                                       const ir::OperandInfo &tensor_info,
-                                       ir::Layout backend_layout)
-{
-  assert(_tensors->getNativeTensor(ind) == nullptr);
-  auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get());
-  _tensors->setNativeTensor(ind, std::move(tensor));
-}
-
-void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor)
-{
-  _dealloc_tensor_map[op_ind].emplace(tensor);
-}
-
-void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
-{
-  auto find = _dealloc_tensor_map.find(op_ind);
-  if (find == _dealloc_tensor_map.end())
-    return;
-
-  auto &input_set = find->second;
-  for (auto *tensor : input_set)
-  {
-    if (!tensor->is_dynamic())
-      continue;
-
-    _dynamic_mem_mgr->deallocate(tensor);
-
-    auto *cpu_tensor = nnfw::misc::polymorphic_downcast<cpu_common::Tensor *>(tensor);
-    cpu_tensor->resetBuffer();
-
-    VERBOSE(DynamicTensorManager) << "Deallocating tensor " << (void *)cpu_tensor
-                                  << " (input of op_ind: " << op_ind.value() << ")" << std::endl;
-  }
-}
-
-const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind)
-{
-  auto ptr = _tensors->getITensor(ind);
-  assert(ptr);
-  return ptr;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc b/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc

deleted file mode 100644 (file)

index 9f179d9..0000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <backend/cpu_common/MemoryManager.h>
-
-#include <cassert>
-
-#include "MemoryPlannerFactory.h"
-#include "util/ConfigSource.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
-{
-  // DO NOTHING
-}
-
-MemoryManager::MemoryManager(const std::string planner_id)
-    : _mem_planner{createMemoryPlanner(planner_id)}
-{
-  // DO NOTHING
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner()
-{
-  auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
-  return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
-{
-  return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
-  _mem_planner->claim(ind, size);
-}
-
-void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
-
-void MemoryManager::allocate(void)
-{
-  _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity());
-  assert(_mem_alloc->base());
-}
-
-uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
-{
-  assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
-  const auto &mem_blk = _mem_planner->memory_plans().at(ind);
-  return _mem_alloc->base() + mem_blk.offset;
-}
-
-std::shared_ptr<cpu_common::Allocator> DynamicMemoryManager::allocate(const ITensor *tensor,
-                                                                      uint32_t capacity)
-{
-  auto find = _mem_alloc_map.find(tensor);
-  if (find != _mem_alloc_map.end())
-    throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated.");
-
-  _mem_alloc_map[tensor] = std::make_shared<cpu_common::Allocator>(capacity);
-  return _mem_alloc_map[tensor];
-}
-
-void DynamicMemoryManager::deallocate(const ITensor *tensor)
-{
-  auto find = _mem_alloc_map.find(tensor);
-  if (find == _mem_alloc_map.end())
-    throw std::runtime_error("Cannot find Allocator for the requested index");
-
-  find->second->release();    // explicitly erase memory
-  _mem_alloc_map.erase(find); // remove tensor and alloc
-}
-
-void DynamicMemoryManager::deallocate(void)
-{
-  for (auto &mem_alloc : _mem_alloc_map)
-  {
-    // Release memory buffer of mem_alloc
-    mem_alloc.second->release();
-  }
-
-  _mem_alloc_map.clear();
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc b/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc

deleted file mode 100644 (file)

index 01cd1a0..0000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryPlanner.h"
-#include "util/logging.h"
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
-  Block blk{_capacity, size};
-  _mem_plans[ind] = blk;
-  _capacity += size;
-
-  VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size
-                      << std::endl;
-}
-
-void BumpPlanner::release(const ir::OperandIndex &ind)
-{
-  VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): "
-                      << "NOTHING does" << std::endl;
-}
-
-// There are some assumptions for claiming memory(== making a reservation for memory).
-// 1. About _claim_table(std::map).
-//   - The table's data structure is std::map so that it always sorts
-//     value(OperandIndex) by key(base_offset).
-//   - This claim() inserts key/value into _claim_table and the release() removes the key/value from
-//     _claim_table.
-//   - _claim_table shows the memory status at a certain point in time. Therefore,
-//     - If _claim_table has an offset and a certain size at a certain point in time,
-//       it means the place at the offset has been already claimed(== can't claim now. need to find
-//       someplace new).
-//     - If _claim_table doesn't have any element for an offset and a certain size at a certain
-//       point in time, it means the place at the offset can be claimed.
-// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
-//    the previous claim_base_offset.
-void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
-  // Find the right position for claiming
-  uint32_t next_offset = 0;
-  for (auto &mem_claim : _claim_table)
-  {
-    auto claimed_base_offset = mem_claim.first;
-    auto claimed_size = _mem_plans[mem_claim.second].size;
-    if (next_offset + size <= claimed_base_offset)
-    {
-      break;
-    }
-    else
-    {
-      next_offset = claimed_base_offset + claimed_size;
-    }
-  }
-
-  // Now next_offset is set to the proper offset
-  _claim_table[next_offset] = ind;
-  _mem_plans[ind] = {next_offset, size};
-
-  VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]"
-                      << std::endl;
-
-  if (_capacity < next_offset + size)
-  {
-    _capacity = next_offset + size;
-  }
-}
-
-void FirstFitPlanner::release(const ir::OperandIndex &ind)
-{
-  for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
-  {
-    if (it->second == ind)
-    {
-      uint32_t offset = it->first;
-      uint32_t index = ind.value();
-      uint32_t size = _mem_plans[ind].size;
-
-      _claim_table.erase(it);
-
-      VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]"
-                          << std::endl;
-      return;
-    }
-  }
-  assert(!"Cannot release for given index. It has been not claimed or released already.");
-}
-
-WICPlanner::WICPlanner()
-    : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
-      _operands()
-{
-  // DO NOTHING
-}
-
-void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
-  _operands.emplace(size, ind);
-  _interference_graph[ind].insert(_interference_graph[ind].end(), _live_operands.cbegin(),
-                                  _live_operands.cend());
-  for (const auto &live_operand : _live_operands)
-  {
-    _interference_graph[live_operand].emplace_back(ind);
-  }
-  _live_operands.emplace(ind);
-
-  VERBOSE(WIC_PLANNER) << "claim(#" << ind.value() << "): [" << size << "sz]" << std::endl;
-}
-
-void WICPlanner::release(const ir::OperandIndex &ind)
-{
-  _live_operands.erase(ind);
-  VERBOSE(WIC_PLANNER) << "release(#" << ind.value() << ")" << std::endl;
-}
-
-/*
- * Build memory plans using liveness and size of operands
- * 1. Build inference graph at claim
- *   - Two operands interfere if they have overlapped live range
- * 2. Sort operands in descending order of size
- *   - Use std::multimap to sort operands
- * 3. Allocate memory block for sorted operands
- *   - Find free memory block which does not overlap with interfered operands
- */
-void WICPlanner::buildMemoryPlans()
-{
-  for (const auto &operand : _operands)
-  {
-    uint32_t size = operand.first;
-    const ir::OperandIndex &ind = operand.second;
-    VERBOSE(WIC_PLANNER) << "build_plan(#" << ind.value() << "): [" << size << "sz]" << std::endl;
-
-    uint32_t next_offset = 0;
-    if (_interference_graph.count(ind))
-    {
-      // Find interfered memory plans and sort them by offset
-      std::multimap<uint32_t, uint32_t> interfered_plans;
-      for (const auto &interference : _interference_graph[ind])
-      {
-        if (_mem_plans.count(interference))
-          interfered_plans.emplace(_mem_plans[interference].offset, _mem_plans[interference].size);
-      }
-
-      // Find free memory block in first-fit manner
-      for (const auto &interfered_plan : interfered_plans)
-      {
-        auto claimed_base_offset = interfered_plan.first;
-        auto claimed_size = interfered_plan.second;
-        VERBOSE(WIC_PLANNER) << "interfere : [+" << claimed_base_offset << ", " << claimed_size
-                             << "sz]" << std::endl;
-        if (next_offset + size <= claimed_base_offset)
-        {
-          break;
-        }
-        else if (next_offset < claimed_base_offset + claimed_size)
-        {
-          next_offset = claimed_base_offset + claimed_size;
-        }
-      }
-    }
-    else
-    {
-      VERBOSE(WIC_PLANNER) << "No interference" << std::endl;
-    }
-
-    _mem_plans[ind] = {next_offset, size};
-    VERBOSE(WIC_PLANNER) << "alloc(#" << ind.value() << "): [+" << next_offset << ", " << size
-                         << "sz]" << std::endl;
-
-    if (_capacity < next_offset + size)
-    {
-      _capacity = next_offset + size;
-    }
-  }
-  _initialized = true;
-  _interference_graph.clear();
-  _operands.clear();
-}
-
-WICPlanner::MemoryPlans &WICPlanner::memory_plans()
-{
-  if (!_initialized)
-    buildMemoryPlans();
-  return _mem_plans;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h b/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h

deleted file mode 100644 (file)

index 7c387e5..0000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file        MemoryPlanner.h
- * @brief       This file contains Memory Planning related classes
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-
-#include <map>
-#include <vector>
-#include <unordered_set>
-#include <memory>
-
-#include "backend/cpu_common/Allocator.h"
-#include "backend/cpu_common/IMemoryPlanner.h"
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Class to plan memory by bump way
- */
-class BumpPlanner : public IMemoryPlanner
-{
-public:
-  /**
-   * @brief Claim memory for operand by bump way
-   * @param[in] index The operand index
-   * @param[in] size The size of the memory
-   */
-  void claim(const ir::OperandIndex &, size_t) override;
-  /**
-   * @brief Release memory for operand by bump way
-   * @param[in] index The operand index
-   */
-  void release(const ir::OperandIndex &) override;
-  /**
-   * @brief Get capacity for memory planning
-   * @return The value of capacity
-   */
-  uint32_t capacity() override { return _capacity; }
-  /**
-   * @brief Get MemoryPlans
-   * @return MemoryPlans
-   */
-  MemoryPlans &memory_plans() override { return _mem_plans; }
-
-private:
-  uint32_t _capacity = 0;
-  MemoryPlans _mem_plans;
-};
-
-/**
- * @brief Class to plan memory by firstfit way
- */
-class FirstFitPlanner : public IMemoryPlanner
-{
-public:
-  /**
-   * @brief Claim memory for operand by firstfit way
-   * @param[in] index The operand index
-   * @param[in] size The size of the memory
-   */
-  void claim(const ir::OperandIndex &, size_t) override;
-  /**
-   * @brief Release memory for operand by firstfit way
-   * @param[in] index The operand index
-   */
-  void release(const ir::OperandIndex &) override;
-  /**
-   * @brief Get capacity for memory planning
-   * @return The value of capacity
-   */
-  uint32_t capacity() override { return _capacity; }
-  /**
-   * @brief Get MemoryPlans
-   * @return MemoryPlans
-   */
-  MemoryPlans &memory_plans() override { return _mem_plans; }
-
-private:
-  uint32_t _capacity = 0;
-  MemoryPlans _mem_plans;
-  // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
-  std::map<uint32_t, ir::OperandIndex> _claim_table;
-};
-
-/**
- * @brief Class to plan memory by Weighted Interval Color algorithm
- */
-class WICPlanner : public IMemoryPlanner
-{
-public:
-  WICPlanner();
-
-  /**
-   * @brief Claim memory for operand by WIC algorithm
-   * @param[in] index The operand index
-   * @param[in] size The size of the memory
-   */
-  void claim(const ir::OperandIndex &, size_t) override;
-  /**
-   * @brief Release memory for operand by WIC algorithm
-   * @param[in] index The operand index
-   */
-  void release(const ir::OperandIndex &) override;
-  /**
-   * @brief Get capacity for memory planning
-   * @return The value of capacity
-   */
-  uint32_t capacity() override
-  {
-    if (!_initialized)
-      buildMemoryPlans();
-    return _capacity;
-  }
-  /**
-   * @brief Get MemoryPlans
-   * @return MemoryPlans
-   */
-  MemoryPlans &memory_plans() override;
-
-private:
-  void buildMemoryPlans();
-
-  bool _initialized;
-  uint32_t _capacity;
-  MemoryPlans _mem_plans;
-  std::unordered_set<ir::OperandIndex> _live_operands;
-  ir::OperandIndexMap<std::vector<ir::OperandIndex>> _interference_graph;
-  // Sort operands by descending order of size
-  std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _operands;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc b/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc

deleted file mode 100644 (file)

index 5208a94..0000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "MemoryPlanner.h"
-#include "ir/Index.h"
-
-TEST(Allocator, allocate_test)
-{
-  ::onert::backend::cpu_common::Allocator allocator(1024);
-  ASSERT_NE(allocator.base(), nullptr);
-}
-
-TEST(BumpPlanner, claim_test)
-{
-  ::onert::backend::cpu_common::BumpPlanner planner;
-
-  auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.claim(mem_idx, size);
-    auto mem_blk = planner.memory_plans()[mem_idx];
-    ASSERT_EQ(mem_blk.offset, expected_offset);
-    ASSERT_EQ(mem_blk.size, size);
-  };
-
-  claim(0, 10, 0);
-  claim(1, 20, 10);
-  claim(2, 30, 30);
-}
-
-TEST(FirstFitPlanner, claim_release_test)
-{
-  ::onert::backend::cpu_common::FirstFitPlanner planner;
-
-  auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.claim(mem_idx, size);
-    auto mem_blk = planner.memory_plans()[mem_idx];
-    ASSERT_EQ(mem_blk.offset, expected_offset);
-    ASSERT_EQ(mem_blk.size, size);
-  };
-
-  auto release = [&planner](uint32_t index) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.release(mem_idx);
-  };
-
-  // 0 CLAIM - 10
-  claim(0, 10, 0);
-
-  // 1 CLAIM - 20
-  claim(1, 20, 10);
-
-  // 2 CLAIM - 30
-  claim(2, 30, 30);
-
-  // 0 RELEASE - 10
-  release(0);
-
-  // 3 CLAIM - 20
-  claim(3, 20, 60);
-
-  // 4 CLAIM - 5
-  claim(4, 5, 0);
-
-  // 5 CLAIM - 10
-  claim(5, 10, 80);
-
-  // 6 CLAIM - 5
-  claim(6, 5, 5);
-
-  // 2 RELEASE - 30
-  release(2);
-
-  // 7 CLAIM - 35
-  claim(7, 35, 90);
-
-  // 8 CLAIM - 10
-  claim(8, 10, 30);
-
-  // 4 RELEASE - 5
-  release(4);
-
-  // 9 CLAIM - 10
-  claim(9, 10, 40);
-
-  // 10 CLAIM - 10
-  claim(10, 10, 50);
-
-  // 6 RELEASE
-  release(6);
-
-  // 1 RELEASE
-  release(1);
-
-  // 8 RELEASE
-  release(8);
-
-  // 9 RELEASE
-  release(9);
-
-  // 10 RELEASE
-  release(10);
-
-  // 3 RELEASE
-  release(3);
-
-  // 5 RELEASE
-  release(5);
-
-  // 7 RELEASE
-  release(7);
-}
-
-TEST(WICPlanner, claim_release_test)
-{
-  ::onert::backend::cpu_common::WICPlanner planner;
-
-  auto claim = [&planner](uint32_t index, size_t size) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.claim(mem_idx, size);
-  };
-
-  auto release = [&planner](uint32_t index) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.release(mem_idx);
-  };
-
-  auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) {
-    onert::ir::OperandIndex mem_idx(index);
-    auto mem_blk = planner.memory_plans()[mem_idx];
-    ASSERT_EQ(mem_blk.offset, expected_offset);
-    ASSERT_EQ(mem_blk.size, size);
-  };
-
-  auto capacity = [&planner](uint32_t expected_capacity) {
-    auto actual_capacity = planner.capacity();
-    ASSERT_EQ(actual_capacity, expected_capacity);
-  };
-
-  claim(0, 20);
-  claim(1, 5);
-  release(0);
-  claim(2, 10);
-  release(1);
-  claim(3, 10);
-  release(2);
-  claim(4, 10);
-  release(3);
-  claim(5, 20);
-  release(4);
-  claim(6, 20);
-  release(5);
-  release(7);
-
-  // VERIFY 0 - 0
-  verify(0, 20, 0);
-
-  // VERIFY 1 - 20
-  verify(1, 5, 20);
-
-  // VERIFY 2 - 0
-  verify(2, 10, 0);
-
-  // VERIFY 3 - 10
-  verify(3, 10, 10);
-
-  // VERIFY 4 - 20
-  verify(4, 10, 20);
-
-  // VERIFY 5 - 0
-  verify(5, 20, 0);
-
-  // VERIFY 6 - 20
-  verify(6, 20, 20);
-
-  // CAPACITY - 40
-  capacity(40);
-}
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc b/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc

deleted file mode 100644 (file)

index ead4f32..0000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryPlannerFactory.h"
-
-#include "MemoryPlanner.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-MemoryPlannerFactory &MemoryPlannerFactory::get()
-{
-  static MemoryPlannerFactory instance;
-  return instance;
-}
-
-IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
-{
-  if (key == "FirstFit")
-  {
-    return new FirstFitPlanner;
-  }
-  else if (key == "Bump")
-  {
-    return new BumpPlanner;
-  }
-  else if (key == "WIC")
-  {
-    return new WICPlanner;
-  }
-  return new FirstFitPlanner; // Default Planner
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h b/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h

deleted file mode 100644 (file)

index d14ec13..0000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-
-#include "backend/cpu_common/IMemoryPlanner.h"
-
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class MemoryPlannerFactory
-{
-public:
-  static MemoryPlannerFactory &get();
-
-private:
-  MemoryPlannerFactory() = default;
-
-public:
-  IMemoryPlanner *create(const std::string &key);
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc

deleted file mode 100644 (file)

index 8c5c46a..0000000
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/StaticTensorManager.h"
-
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/Tensor.h"
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         DynamicTensorManager *dynamic_tensor_manager)
-    : _nonconst_mgr{new MemoryManager()}, _tensors{reg},
-      _dynamic_tensor_manager{dynamic_tensor_manager}
-{
-  // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
-  _nonconst_mgr->allocate();
-
-  for (auto &pair : _tensors->native_tensors())
-  {
-    const auto &ind = pair.first;
-    auto tensor = pair.second.get();
-    if (!_as_constants[ind] && !tensor->is_dynamic())
-    {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
-                                       << "): " << static_cast<void *>(buffer) << std::endl;
-    }
-  }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
-                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
-                                      bool as_const)
-{
-  assert(!_tensors->getNativeTensor(ind));
-  if (as_const)
-  {
-    auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
-    _tensors->setNativeTensor(ind, std::move(tensor));
-  }
-  else
-  {
-    auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
-                                           _dynamic_tensor_manager->dynamic_mem_mgr().get());
-    _tensors->setNativeTensor(ind, std::move(tensor));
-  }
-  _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
-  assert(_tensors->getNativeTensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
-  assert(_tensors->getNativeTensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
-  for (const auto &it : _tensors->native_tensors())
-    fn(it.first);
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/cpu_common/Tensor.cc

deleted file mode 100644 (file)

index e412cb7..0000000
--- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/Tensor.h"
-
-#include "ir/DataType.h"
-#include "backend/cpu_common/MemoryManager.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-Tensor::~Tensor() {}
-
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
-{
-  size_t rank = num_dimensions();
-  rank = rank == 0 ? 1 : rank;
-  size_t offset = 0;
-  for (size_t i = 0; i < rank; ++i)
-  {
-    offset = offset * dimension(i) + coords[i];
-  }
-  offset *= sizeOfDataType(data_type());
-  return offset;
-}
-
-void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
-
-bool Tensor::applyShape(const ir::Shape &new_shape)
-{
-  bool previously_dynamic = is_dynamic();
-
-  auto allocTensorMem = [&](bool overwrite = false) {
-    auto capacity = total_size();
-    auto alloc = _dynamic_mem_mgr->allocate(this, capacity);
-
-    if (overwrite)
-      overwriteBuffer(alloc);
-    else
-      setBuffer(alloc);
-  };
-
-  if (!previously_dynamic)
-  {
-    // TODO deallocate tensor->buffer()
-    // issue is that staticTensorManager might have allocate this memory
-    setShape(new_shape);
-    set_dynamic();
-    allocTensorMem(true);
-  }
-  else if (buffer() == nullptr)
-  {
-    setShape(new_shape);
-    set_dynamic();
-    allocTensorMem();
-  }
-  // when buffer was already allocated and new_shape requires different size
-  else
-  {
-    auto previous_size = total_size();
-    auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
-    if (previous_size != new_size)
-    {
-      _dynamic_mem_mgr->deallocate(this);
-
-      setShape(new_shape);
-      set_dynamic();
-      allocTensorMem(true);
-    }
-    else
-    { // when buffer with same size was already allocated, shape could differ
-      setShape(new_shape);
-    }
-  }
-  return true;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-// ExternalTensor
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-// `dynamic_cast` not working across library boundaries on NDK
-// With this as a key function, `dynamic_cast` works across dl
-ExternalTensor::~ExternalTensor() {}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc

index ea45cbeb756b39a4f02324555832476da62d20c4..0d6051b21e7829f92993167f8133f3eee278eede 100644 (file)
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -20,8 +20,8 @@
  #include <dlfcn.h>
  
  #include "backend/Backend.h"
-#include "backend/controlflow/Backend.h"
-#include "backend/controlflow/Config.h"
+#include "backend/builtin/Backend.h"
+#include "backend/builtin/Config.h"
  #include "backend/IConfig.h"
  #include "util/logging.h"
  #include "util/ConfigSource.h"
@@ -29,9 +29,9 @@
  
  static const char *SHARED_LIB_EXT =
  #if defined(__APPLE__) && defined(__MACH__)
-    ".dylib";
+  ".dylib";
  #else
-    ".so";
+  ".so";
  #endif
  
  namespace onert
@@ -45,20 +45,20 @@ BackendManager &BackendManager::get()
    return object;
  }
  
-BackendManager::BackendManager() { loadControlflowBackend(); }
+BackendManager::BackendManager() { loadBuiltinBackend(); }
  
-void BackendManager::loadControlflowBackend()
+void BackendManager::loadBuiltinBackend()
  {
-  auto backend_object = std::unique_ptr<backend::controlflow::Backend, backend_destroy_t>(
-      new backend::controlflow::Backend, [](backend::Backend *backend) { delete backend; });
+  auto backend_object = std::unique_ptr<backend::builtin::Backend, backend_destroy_t>(
+    new backend::builtin::Backend, [](backend::Backend *backend) { delete backend; });
  
    bool initialized = backend_object->config()->initialize(); // Call initialize here?
    if (!initialized)
    {
-    throw std::runtime_error(backend::controlflow::Config::ID + " backend initialization failed");
+    throw std::runtime_error(backend::builtin::Config::ID + " backend initialization failed");
    }
-  _controlflow = backend_object.get(); // Save the controlflow backend implementation pointer
-  assert(_controlflow);
+  _builtin = backend_object.get(); // Save the builtin backend implementation pointer
+  assert(_builtin);
    _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
  }
  
@@ -104,7 +104,7 @@ void BackendManager::loadBackend(const std::string &backend)
      }
  
      auto backend_object =
-        std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
+      std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
      bool initialized = backend_object->config()->initialize(); // Call initialize here?
      if (!initialized)
      {
@@ -118,24 +118,18 @@ void BackendManager::loadBackend(const std::string &backend)
    }
  
    // Save backend handle (avoid warning by handle lost without dlclose())
-
-  // NOTE This is a workaround for clang-format3.9 (seems like it does not understand
-  //      "by-copy capture with an initializer"
-  // clang-format off
    auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{
-      handle, [id = backend, filename = backend_so](void *h) {
-        if (dlclose(h) == 0)
-        {
-          VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
-        }
-        else
-        {
-          VERBOSE(BackendManager)
-              << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
-        }
-      }};
-// clang-format on
-_handle_map.emplace(backend, std::move(u_handle));
+    handle, [id = backend, filename = backend_so](void *h) {
+      if (dlclose(h) == 0)
+      {
+        VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
+      }
+      else
+      {
+        VERBOSE(BackendManager) << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
+      }
+    }};
+  _handle_map.emplace(backend, std::move(u_handle));
  }
  
  backend::Backend *BackendManager::get(const std::string &key)
@@ -158,7 +152,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const
    return nullptr;
  }
  
-const backend::controlflow::Backend *BackendManager::getControlflow() const { return _controlflow; }
+const backend::builtin::Backend *BackendManager::getBuiltin() const { return _builtin; }
  
  } // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc

index 7eeb14ad3e4a87cbc09edc7efd6bf0d01dc05341..082bdc9d089a93734fc38b77aa22244f7c25dc0d 100644 (file)
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -16,22 +16,21 @@
  
  #include "compiler/Compiler.h"
  
-#include "ParamChecker.h"
  #include "ExecutorFactory.h"
  #include "ShapeValidator.h"
-#include "Fp32ToFp16Converter.h"
  
-#include <backend/controlflow/Config.h>
+#include <backend/builtin/Config.h>
  #include "compiler/BackendManager.h"
  #include "compiler/IScheduler.h"
  #include "compiler/ManualScheduler.h"
  #include "compiler/HEScheduler.h"
  #include "compiler/StaticShapeInferer.h"
+#include "compiler/OperationLowerInfo.h"
  #include "compiler/pass/ConstantOutputPass.h"
  #include "compiler/pass/OddOutputPass.h"
  #include "compiler/pass/PassRunner.h"
+#include "compiler/pass/UnusedOperandEliminationPass.h"
  #include "exec/ExecTime.h"
-#include "ir/operation/LowerInfo.h"
  #include "ir/verifier/Verifier.h"
  #include "dumper/dot/DotDumper.h"
  #include "compiler/Linear.h"
@@ -77,15 +76,11 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
    options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
    options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
    options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
-  options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE);
    options.executor = util::getConfigString(util::config::EXECUTOR);
    options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
    options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
    options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
    options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
-#ifdef RUY_PROFILER
-  options.op_seq_max_node = 1;
-#endif
  
    {
      // Backend for all
@@ -123,8 +118,8 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
        auto key = static_cast<uint32_t>(std::stoi(key_str));
  
        subgs.at(ir::SubgraphIndex{0})
-          ->operations()
-          .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
+        ->operations()
+        .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
        ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
      }
    }
@@ -132,7 +127,7 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
  }
  
  Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx)
-    : _subgraphs{subgs}, _state{State::CREATED}
+  : _subgraphs{subgs}, _state{State::CREATED}
  {
    // Set default values for CompilerOptions
    // All these default values should not be fetched from Env, when we stop supporting Android NN
@@ -157,10 +152,10 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
  {
    // Set control flow backend for control flow operators
    {
-    auto &cfid = backend::controlflow::Config::ID;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = cfid;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = cfid;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = cfid;
+    auto &builtin_id = backend::builtin::Config::ID;
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
    }
  
    // FIXME This is a workaround for bcq operations, should remove it
@@ -170,15 +165,13 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
    }
  
    {
-    VERBOSE(Compiler) << std::boolalpha;
-    VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
+    VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
      VERBOSE(Compiler) << "backend_list             : "
                        << nnfw::misc::join(_options.backend_list.begin(),
                                            _options.backend_list.end(), "/")
                        << std::endl;
      VERBOSE(Compiler) << "trace_filepath           : " << _options.trace_filepath << std::endl;
      VERBOSE(Compiler) << "graph_dump_level         : " << _options.graph_dump_level << std::endl;
-    VERBOSE(Compiler) << "op_seq_max_node          : " << _options.op_seq_max_node << std::endl;
      VERBOSE(Compiler) << "executor                 : " << _options.executor << std::endl;
      VERBOSE(Compiler) << "manual backend_for_all   : "
                        << _options.manual_scheduler_options.backend_for_all << std::endl;
@@ -188,16 +181,19 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
      VERBOSE(Compiler) << "he_scheduler             : " << _options.he_scheduler << std::endl;
      VERBOSE(Compiler) << "he_profiling_mode        : " << _options.he_profiling_mode << std::endl;
      VERBOSE(Compiler) << "disable_compile          : " << _options.disable_compile << std::endl;
-    VERBOSE(Compiler) << "fp16_enable              : " << _options.fp16_enable << std::endl;
-    VERBOSE(Compiler) << std::noboolalpha;
+    VERBOSE(Compiler) << "fp16_enable              : " << _options.fp16_enable << std::endl
+                      << std::noboolalpha;
    }
  
    _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
      // Mandatory passes
      pass::PassRunner{}
-        .append(std::make_unique<pass::ConstantOutputPass>(subg))
-        .append(std::make_unique<pass::OddOutputPass>(subg))
-        .run();
+      .append(std::make_unique<pass::ConstantOutputPass>(subg))
+      .append(std::make_unique<pass::OddOutputPass>(subg))
+      .run();
+
+    // Optimizations
+    pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
    });
  
    /***************************************************
@@ -208,7 +204,7 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
    // Compilable check
    // TODO: Support hybrid execution -
    //       execution between interpreter and compiled executor (including control flow)
-  if (!checkCompilable())
+  if (_options.disable_compile)
    {
      _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
        executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
@@ -235,22 +231,6 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
      // Lower: Assign backend
      lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
  
-    // Check backend(s) for subgraph support FP16
-    bool backends_support_fp16 = true;
-    auto &contexts = (*lowered_subgs[index]).backend_contexts();
-    for (auto it = contexts.begin(); it != contexts.end(); it++)
-    {
-      // Controlflow backend is not for actual computaion of operations so it is an exception
-      if (it->first->config()->id() != backend::controlflow::Config::ID)
-        backends_support_fp16 &= it->first->config()->supportFP16();
-    }
-
-    if (_options.fp16_enable && backends_support_fp16)
-    {
-      // NOTE: the only acl_cl backend enables fp16 mode
-      Fp32ToFp16Converter(*lowered_subgs[index]).run();
-    }
-
      subg.setSubgraphs(nullptr);
    });
  
@@ -268,11 +248,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
    {
      const auto primary_subg_idx = ir::SubgraphIndex{0};
      StaticShapeInferer inferer(primary_subg_idx, lowered_subgs);
-    lowered_subgs.at(primary_subg_idx)
-        ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-          auto has_dynamic_tensor = inferer.infer(op_seq);
-          op_seq.has_dynamic_tensor(has_dynamic_tensor);
-        });
+    auto &lowered_subg = lowered_subgs.at(primary_subg_idx);
+    auto ordered_ops = lowered_subg->graph().topolSortOperations();
+    for (auto op_ind : ordered_ops)
+    {
+      const auto &op = lowered_subg->graph().operations().at(op_ind);
+      bool has_dynamic_tensor = inferer.infer(op);
+      lowered_subg->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+    }
      inferer.dump();
    }
  
@@ -303,9 +286,9 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
      ir::OperationDumper dumper("Executor generation of Subgraph " +
                                 std::to_string(subg_index.value()));
      lowered_subg->graph().operations().iterate(
-        [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
+      [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
      auto executor = std::unique_ptr<exec::IExecutor>{
-        ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
+      ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
      executor->setIndexedRanks(indexed_ranks);
      executors->insert(std::make_pair(subg_index, std::move(executor)));
    }
@@ -317,32 +300,6 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
    return executors;
  }
  
-bool Compiler::checkCompilable()
-{
-  // Disable compile phase
-  // When ready to use interpreter backend, remove this config and use backend setting
-  if (_options.disable_compile)
-  {
-    return false;
-  }
-
-  // TODO check unspecified operand shape
-
-  // Check compilable parameter
-  for (uint32_t i = 0; i < _subgraphs->count(); ++i)
-  {
-    auto graph = _subgraphs->at(ir::SubgraphIndex{i});
-    ParamChecker paramChecker{graph};
-    paramChecker();
-    if (paramChecker.haveNoneConstParam())
-    {
-      return false;
-    }
-  }
-
-  return true;
-}
-
  } // namespace compiler
  
  } // namespace onert
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc

index 356feed7c87c01278e97fc6e270f43ed9099f3ca..ba038e93599396f75a9f494d1eae9f66e9a4d629 100644 (file)
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -18,6 +18,7 @@
  
  #include <deque>
  #include <functional>
+#include "ir/OperationCloner.h"
  #include "exec/ExecutionObservers.h"
  #include "exec/LinearExecutor.h"
  #include "exec/DataflowExecutor.h"
@@ -26,12 +27,14 @@
  #include "compiler/ExecutionBuilder.h"
  #include "exec/ExecTime.h"
  #include "compiler/Linear.h"
+#include "compiler/BackendManager.h"
  #include "backend/IPortableTensor.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/KernelGenerator.h"
-#include "backend/controlflow/UserTensor.h"
-#include "backend/controlflow/TensorBuilder.h"
+#include "backend/builtin/Config.h"
+#include "backend/builtin/KernelGenerator.h"
+#include "backend/builtin/UserTensor.h"
+#include "backend/builtin/TensorBuilder.h"
  #include "util/TracingCtx.h"
+#include "dumper/text/GraphDumper.h"
  
  #include <memory>
  
@@ -45,7 +48,7 @@ class SyncFunction final : public exec::IFunction
  public:
    virtual ~SyncFunction() = default;
    SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
-      : _fn{std::move(fn)}, _config{config}
+    : _fn{std::move(fn)}, _config{config}
    {
      assert(_fn);
      assert(_config);
@@ -64,36 +67,164 @@ private:
    std::shared_ptr<backend::IConfig> _config;
  };
  
+using DeallocList = std::vector<backend::ITensor *>;
+// Deallocation after execution of an operation used by Linear Executor
+class DeallocFunction final : public exec::IFunction
+{
+public:
+  DeallocFunction(const DeallocList &tensors) : _dealloc_list{tensors} {}
+
+  void run() override
+  {
+    for (auto tensor : _dealloc_list)
+    {
+      if (!tensor->is_dynamic())
+        continue;
+      tensor->deallocBuffer();
+    }
+  }
+
+private:
+  DeallocList _dealloc_list;
+};
+
  void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
+                                 const backend::BackendContexts &backend_contexts,
                                   const ir::OperandIndexSequence &indices)
  {
-  // TODO Store controlflow backend in BackendContext
-  std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
-  for (const auto &e : lowered_graph.backend_contexts())
+  // TODO Store builtin backend in BackendContext
+  std::shared_ptr<backend::builtin::TensorRegistry> builtin_tensor_reg;
+  for (const auto &e : backend_contexts)
    {
      auto backend = e.first;
      auto &context = e.second;
-    if (backend->config()->id() == backend::controlflow::Config::ID)
+    if (backend->config()->id() == backend::builtin::Config::ID)
      {
-      cf_tensor_reg =
-          std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+      builtin_tensor_reg =
+        std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(context->tensor_registry);
      }
    }
-  assert(cf_tensor_reg);
+  assert(builtin_tensor_reg);
  
    for (auto ind : indices)
    {
      const auto &operand = lowered_graph.graph().operands().at(ind);
-    auto tensor = std::make_unique<backend::controlflow::IOTensor>(
-        operand.info(),
-        ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
-        );
+    auto tensor = std::make_unique<backend::builtin::IOTensor>(
+      operand.info(),
+      ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */
+    );
  
-    // Add tensor to controlflow TensorRegistry.
-    cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+    // Add tensor to builtin TensorRegistry.
+    builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
    }
  }
  
+backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, bool linear_executor)
+{
+  backend::BackendContexts contexts;
+  auto &backend_manager = compiler::BackendManager::get();
+
+  std::unordered_map<const backend::Backend *, backend::ContextData> context_data_map;
+
+  // Generate partial graphs for each backend
+  for (auto backend : backend_manager.getAll())
+  {
+    auto &data = context_data_map[backend];
+    auto graph = std::make_unique<ir::Graph>();
+    graph->setLayout(lgraph.graph().layout());
+    data.graph = std::move(graph);
+  }
+
+  auto &whole_graph = lgraph.graph();
+  // Separate operands into partial graphs
+  whole_graph.operands().iterate([&](const ir::OperandIndex &operand_ind, ir::Operand &operand) {
+    auto &operand_li = lgraph.lower_info().operand;
+    const auto &def_factors = operand_li.at(operand_ind).def_factors();
+    if (def_factors.size() == 0) // Ignore unused tensor
+      return;
+    const auto &def_factor = def_factors.getOnlyElement();
+    const auto backend = def_factor.backend();
+    auto &partial_graph = *context_data_map[backend].graph;
+    auto &operand_layouts = context_data_map[backend].operand_layouts;
+    assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+    operand_layouts[operand_ind] = def_factor.layout();
+
+    // Copy the operand and insert it to the partial graph
+    auto new_operand = std::make_unique<ir::Operand>(operand);
+    new_operand->clearDefUse();
+    operand.releaseData(); // Deref data of LoweredGraph
+    auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+    UNUSED_RELEASE(new_operand_ind);
+    assert(new_operand_ind == operand_ind);
+  });
+  // Separate operations into partial graphs
+  whole_graph.operations().iterate(
+    [&](const ir::OperationIndex &op_ind, const ir::Operation &operation) {
+      auto &op_li = lgraph.lower_info().operation;
+      auto backend = op_li.at(op_ind).backend();
+      auto &partial_graph = *context_data_map[backend].graph;
+      auto &external_operands = context_data_map[backend].external_operands;
+      auto &operand_layouts = context_data_map[backend].operand_layouts;
+
+      {
+        // Add missing operands (externals)
+        auto io_list = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED |
+                       ir::Remove::UNDEFINED;
+        for (auto operand_ind : io_list)
+        {
+          if (partial_graph.operands().exist(operand_ind))
+            continue;
+
+          // Copy the operand and insert it to the partial graph
+          const auto &operand = whole_graph.operands().at(operand_ind);
+          auto new_operand = std::make_unique<ir::Operand>(operand);
+          new_operand->clearDefUse();
+          auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+          UNUSED_RELEASE(new_operand_ind);
+          assert(new_operand_ind == operand_ind);
+
+          auto layout =
+            lgraph.lower_info().operand.at(operand_ind).def_factors().getOnlyElement().layout();
+          assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+          operand_layouts[operand_ind] = layout;
+          external_operands.add(operand_ind);
+        }
+
+        auto new_op_ind = partial_graph.addOperation(op_ind, clone(operation));
+        UNUSED_RELEASE(new_op_ind);
+        assert(new_op_ind == op_ind);
+      }
+    });
+
+  // Create contexts
+  auto whole_op_order = lgraph.graph().topolSortOperations();
+  for (auto &pair : context_data_map)
+  {
+    auto backend = pair.first;
+    auto &data = pair.second;
+    // Handle graph input/outputs or external tensors
+    data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      if (whole_graph.getInputs().contains(ind) || whole_graph.getOutputs().contains(ind))
+        data.external_operands.add(ind);
+      // Inputs are either "graph input" or "no def op and non-constant"
+      if (whole_graph.getInputs().contains(ind) ||
+          (!operand.getDef().valid() && !operand.isConstant()))
+        // Outputs are either "graph output" or "no uses"
+        data.graph->addInput(ind);
+      if (whole_graph.getOutputs().contains(ind) || operand.getUses().size() == 0)
+        data.graph->addOutput(ind);
+    });
+    dumper::text::dumpGraph(*data.graph);
+
+    std::copy_if(whole_op_order.begin(), whole_op_order.end(), std::back_inserter(data.op_order),
+                 [&](const auto &ind) { return data.graph->operations().exist(ind); });
+    data.is_linear_executor = linear_executor;
+    data.custom_kernel_builder = lgraph.graph().getKernelBuilder();
+    contexts.emplace(backend, backend->newContext(std::move(data)));
+  }
+  return contexts;
+}
+
  } // namespace
  } // namespace onert
  
@@ -124,68 +255,31 @@ exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph>
    return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
  }
  
-void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
-{
-  struct Entry
-  {
-    std::vector<backend::BackendContext::OperationInfo> operation_list;
-    std::vector<ir::OperandIndex> operand_list;
-  };
-  std::unordered_map<const backend::Backend *, Entry> backend_assets;
-
-  // Build lists for operations
-  lowered_graph->op_seqs().iterate(
-      [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
-        auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
-        auto backend = op_seq_li.at(op_seq_index)->backend();
-        for (auto &operation_idx : op_seq.operations())
-        {
-          backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
-        }
-      });
-
-  // Build lists for operands
-  lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-    const auto lower_info = lowered_graph->getLowerInfo(ind);
-    for (auto factor : lower_info->def_factors())
-    {
-      auto backend = factor.backend();
-      backend_assets[backend].operand_list.emplace_back(ind);
-    }
-  });
-
-  for (auto &pair : backend_assets)
-  {
-    auto backend = pair.first;
-    auto &arg = pair.second;
-    lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
-  }
-}
-
-void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
+void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
+                                            const backend::BackendContexts &backend_contexts)
  {
-  TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
-
-  lowered_graph.op_seqs().iterate(
-      [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
-        auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
-        auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
-        for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
-                            ir::Remove::UNDEFINED)
+  TensorRegistries tensor_regs{backend_contexts, true};
+
+  lowered_graph.graph().operations().iterate(
+    [&](const ir::OperationIndex &op_ind, const ir::Operation &op) {
+      auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind);
+      auto &backend_ctx = backend_contexts.at(lower_info->backend());
+      for (auto ind :
+           (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+      {
+        // If an Operation's input/output tensor does not have an own tensor object,
+        // it must be using migrant tensors, so find the tensor from other tensor registries and
+        // register it to the current tensor registry if it is portable
+        if (!backend_ctx->tensor_registry->getITensor(ind))
          {
-          // If an OpSequence input/output tensor does not have a own tensor object,
-          // it must be using migrant tensors, so find the tensor from other tensor builders and
-          // set the tensor to this tensor builder if portable
-          if (!backend_ctx->tensor_registry->getITensor(ind))
-          {
-            auto tensor = tensor_regs.getITensor(ind);
-            assert(tensor); // The tensor must have been registered
-            auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
-            if (ptensor)
-              backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
-          }
+          auto tensor = tensor_regs.getITensor(ind);
+          assert(tensor); // The tensor must have been registered
+          auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+          if (ptensor)
+            backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
          }
-      });
+      }
+    });
  }
  
  exec::IExecutor *
@@ -193,17 +287,17 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
                                        const compiler::CompilerOptions &options,
                                        const std::shared_ptr<exec::ExecutorMap> &executor_map)
  {
-  const auto &backend_contexts = lowered_graph->backend_contexts();
-
-  initializeBackendContext(lowered_graph.get());
+  auto graph = lowered_graph->graph();
  
-  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
+  backend::BackendContexts backend_contexts =
+    createBackendContexts(*lowered_graph, options.executor == "Linear");
  
-  assert(!lowered_graph->graph().isBuildingPhase());
+  TensorRegistries tensor_regs{backend_contexts, true};
  
    initializeSubgraphIOTensors(
-      *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
-                          ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+    *lowered_graph, backend_contexts,
+    (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+      ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
  
    // linearize
    auto order = Linear::linearize(*lowered_graph);
@@ -211,20 +305,20 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
  
    for (auto &pair : backend_contexts)
    {
-    pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
+    pair.second->genTensors();
    }
  
-  prepareMigrantTensors(*lowered_graph);
+  prepareMigrantTensors(*lowered_graph, backend_contexts);
  
-  // Give some runtime objects to controlflow KernelGenerator
+  // Give some runtime objects to builtin KernelGenerator
    for (auto &pair : backend_contexts)
    {
-    auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
-    if (cf_context != nullptr)
+    auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+    if (builtin_context != nullptr)
      {
-      auto cf_kernel_gen = cf_context->kernel_gen;
-      cf_kernel_gen->setTensorRegistries(tensor_regs);
-      cf_kernel_gen->setExecutorMap(executor_map);
+      auto builtin_kernel_gen = builtin_context->kernel_gen;
+      builtin_kernel_gen->setTensorRegistries(tensor_regs);
+      builtin_kernel_gen->setExecutorMap(executor_map);
      }
    }
  
@@ -234,41 +328,97 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
    std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
    for (auto &pair : backend_contexts)
    {
-    // NOTE controlflow backend must be processed lastly.
+    // NOTE builtin backend must be processed lastly.
      // This is because of Permute layer's specialty which is the only operation that could have
      // different ITensor objects for the input and the output. And it requires all other backends'
      // tensors are ready to use.
-    if (pair.first->config()->id() == "controlflow")
+    if (pair.first->config()->id() == "builtin")
        ordered_contexts.emplace_back(pair.first, pair.second.get());
      else
        ordered_contexts.emplace_front(pair.first, pair.second.get());
    }
  
+  // Simulate the execution for deallocation of tensors
+  std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
+  {
+    ir::OperandIndexMap<uint32_t> uses_map;
+    ir::OperandIndexSequence constants;
+
+    auto model_io =
+      (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+    // Prepare scanning
+    graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      uses_map[ind] = obj.getUses().size();
+
+      if (obj.isConstant())
+        constants.append(ind);
+    });
+
+    // A trick to consider constants as an execption
+    for (const auto &ind : constants)
+    {
+      uses_map[ind]++;
+    }
+
+    for (const auto op_ind : order)
+    {
+      const auto &op = graph.operations().at(op_ind);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      for (const auto &ind : op_inputs)
+      {
+        const auto &operand = graph.operands().at(ind);
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
+        {
+          dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
+        }
+      }
+    }
+
+    // Dispose and validate
+    for (const auto &ind : constants)
+    {
+      --uses_map[ind];
+    }
+
+    assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+  }
+
    // Generate kernels
    for (auto &pair : ordered_contexts)
    {
-    auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+    auto codes = pair.second->genKernels();
      for (auto &pair : codes)
      {
-      auto &op_seq_ind = pair.first;
+      auto &op_ind = pair.first;
        auto &fn_seq = pair.second;
-      auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
-      auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+      auto &op = lowered_graph->graph().operations().at(op_ind);
+      auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
        if (options.he_profiling_mode)
          fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
-      builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+      if (!dealloc_list_map[op_ind].empty())
+        fn_seq->append(std::make_unique<DeallocFunction>(dealloc_list_map[op_ind]));
+      builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
      }
    }
  
    auto code_map = builder.releaseCodeMap();
  
-  auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
-                                       order, options.tracing_ctx};
+  auto exec = new exec::LinearExecutor{
+    std::move(lowered_graph), std::move(backend_contexts), tensor_regs, std::move(code_map), order,
+    options.tracing_ctx};
  
    if (!options.trace_filepath.empty())
    {
      std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
-        options.trace_filepath, exec->graph(), options.tracing_ctx);
+      options.trace_filepath, exec->graph(), options.tracing_ctx);
      exec->addObserver(std::move(ctp));
    }
  
@@ -276,41 +426,35 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
  }
  
  exec::IExecutor *ExecutorFactory::createDataflowExecutor(
-    std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
-    const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
+  std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
+  const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
  {
-  const auto &backend_contexts = lowered_graph->backend_contexts();
+  backend::BackendContexts backend_contexts =
+    createBackendContexts(*lowered_graph, options.executor == "Linear");
  
-  initializeBackendContext(lowered_graph.get());
-
-  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
-
-  assert(!lowered_graph->graph().isBuildingPhase());
+  TensorRegistries tensor_regs{backend_contexts, true};
  
    initializeSubgraphIOTensors(
-      *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
-                          ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+    *lowered_graph, backend_contexts,
+    (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+      ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
  
-  // linearize
-  // This order is just for giving topological order info to the backens
-  // TODO When we pass a partial graph to a backend, we can remove this
-  auto order = Linear::linearize(*lowered_graph);
    for (auto &pair : backend_contexts)
    {
-    pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
+    pair.second->genTensors();
    }
  
-  prepareMigrantTensors(*lowered_graph);
+  prepareMigrantTensors(*lowered_graph, backend_contexts);
  
-  // Give some runtime objects to controlflow KernelGenerator
+  // Give some runtime objects to builtin KernelGenerator
    for (auto &pair : backend_contexts)
    {
-    auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
-    if (cf_context != nullptr)
+    auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+    if (builtin_context != nullptr)
      {
-      auto cf_kernel_gen = cf_context->kernel_gen;
-      cf_kernel_gen->setTensorRegistries(tensor_regs);
-      cf_kernel_gen->setExecutorMap(executor_map);
+      auto builtin_kernel_gen = builtin_context->kernel_gen;
+      builtin_kernel_gen->setTensorRegistries(tensor_regs);
+      builtin_kernel_gen->setExecutorMap(executor_map);
      }
    }
  
@@ -320,11 +464,11 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
    std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
    for (auto &pair : backend_contexts)
    {
-    // NOTE controlflow backend must be processed lastly.
+    // NOTE builtin backend must be processed lastly.
      // This is because of Permute layer's specialty which is the only operation that could have
      // different ITensor objects for the input and the output. And it requires all other backends'
      // tensors are ready to use.
-    if (pair.first->config()->id() == "controlflow")
+    if (pair.first->config()->id() == "builtin")
        ordered_contexts.emplace_back(pair.first, pair.second.get());
      else
        ordered_contexts.emplace_front(pair.first, pair.second.get());
@@ -333,16 +477,16 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
    // Generate kernels
    for (auto &pair : ordered_contexts)
    {
-    auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+    auto codes = pair.second->genKernels();
      for (auto &pair : codes)
      {
-      auto &op_seq_ind = pair.first;
+      auto &op_ind = pair.first;
        auto &fn_seq = pair.second;
-      auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
-      auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+      auto &op = lowered_graph->graph().operations().at(op_ind);
+      auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
        if (options.he_profiling_mode)
          fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
-      builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+      builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
      }
    }
  
@@ -351,13 +495,14 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
    exec::ExecutorBase *exec = nullptr;
    if (parallel)
    {
-    exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
-                                      options.tracing_ctx};
+    exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts),
+                                      tensor_regs, std::move(code_map), options.tracing_ctx};
    }
    else
    {
-    auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs,
-                                                    std::move(code_map), options.tracing_ctx};
+    auto dataflow_exec =
+      new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+                                 std::move(code_map), options.tracing_ctx};
      if (options.he_profiling_mode)
      {
        std::vector<const backend::Backend *> backends;
@@ -367,7 +512,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
        }
        auto et = std::make_shared<exec::ExecTime>(backends);
        std::unique_ptr<exec::IExecutionObserver> obs =
-          std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
+        std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
        dataflow_exec->addObserver(std::move(obs));
      }
      exec = dataflow_exec;
@@ -376,7 +521,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
    if (!options.trace_filepath.empty())
    {
      std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
-        options.trace_filepath, exec->graph(), options.tracing_ctx);
+      options.trace_filepath, exec->graph(), options.tracing_ctx);
      exec->addObserver(std::move(ctp));
    }
  
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h

index 06dc691dbd8d4d44776f45a9232a95a71ef27688..5fe1617a61d432776755259ba4f7ec09c9969b19 100644 (file)
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -43,10 +43,8 @@ private:
    ExecutorFactory();
  
  private:
-  static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
-  static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
-                                    const std::vector<ir::OpSequenceIndex> &order);
-  static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph);
+  static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
+                                    const backend::BackendContexts &backend_contexts);
    static exec::IExecutor *
    createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                         const compiler::CompilerOptions &options,
@@ -58,10 +56,10 @@ private:
  
  private:
    std::unordered_map<std::string, std::function<exec::IExecutor *(
-                                      std::unique_ptr<compiler::LoweredGraph>,
-                                      const compiler::CompilerOptions &options,
-                                      const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
-      _map;
+                                    std::unique_ptr<compiler::LoweredGraph>,
+                                    const compiler::CompilerOptions &options,
+                                    const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+    _map;
  };
  
  } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc

index 23a6a253db2ddf8322d083af2f288f0b86611d0f..5c1cef1abe659b726e29b8094fff7f1d68025d3e 100644 (file)
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -14,6 +14,8 @@
   * limitations under the License.
   */
  
+#if 0 // This file is temporarily unused
+
  #include "Fp32ToFp16Converter.h"
  #include "ir/operation/ConvertFp32ToFp16.h"
  #include "ir/operation/ConvertFp16ToFp32.h"
@@ -45,7 +47,7 @@ namespace compiler
  {
  
  Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
-    : _lowered_graph{lowered_graph}
+  : _lowered_graph{lowered_graph}
  {
    VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
  }
@@ -177,26 +179,26 @@ void Fp32ToFp16Converter::run()
  void Fp32ToFp16Converter::appendOpSequences()
  {
    _lowered_graph.op_seqs().iterate(
-      [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
-        const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
-        assert(lower_info != nullptr);
-
-        // For now, the only acl_cl supports fully fp16 type
-        // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
-        // operations.
-        //      To do this, we could check the support by `operation by operation`. After that, we
-        //      would partition an op_seq if it contains unsupported operations.
-        if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
-          return;
-
-        // OpSeq's input set should be included in the first operation's input set or
-        // OpSeq's output set should be included in the last operation's output set
-        assert(checkOperandsOfOpSequence(op_seq));
-
-        // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
-        appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
-        appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
-      });
+    [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
+      const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+      assert(lower_info != nullptr);
+
+      // For now, the only acl_cl supports fully fp16 type
+      // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
+      // operations.
+      //      To do this, we could check the support by `operation by operation`. After that, we
+      //      would partition an op_seq if it contains unsupported operations.
+      if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
+        return;
+
+      // OpSeq's input set should be included in the first operation's input set or
+      // OpSeq's output set should be included in the last operation's output set
+      assert(checkOperandsOfOpSequence(op_seq));
+
+      // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
+      appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
+      appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
+    });
  }
  
  //
@@ -253,7 +255,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp32ToFp16(const ir::OpSequenc
      const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
  
      // set new lower_info for op_seq
-    setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
+    setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
  
      _list_fp32_to_fp16.insert(new_op_seq_ind);
  
@@ -326,7 +328,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenc
      auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
  
      // set new lower_info for op_seq
-    setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
+    setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
  
      _list_fp16_to_fp32.insert(new_op_seq_ind);
  
@@ -372,16 +374,16 @@ void Fp32ToFp16Converter::optimize()
  void Fp32ToFp16Converter::convertOperands()
  {
    _lowered_graph.op_seqs().iterate(
-      [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
-        const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
-        assert(lower_info != nullptr);
-        // For now, the only acl_cl supports fully fp16
-        if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
-          return;
-
-        // Convert input,output operands' type to fp16
-        convertOperandsOfOpSequence(op_seq);
-      });
+    [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
+      const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+      assert(lower_info != nullptr);
+      // For now, the only acl_cl supports fully fp16
+      if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
+        return;
+
+      // Convert input,output operands' type to fp16
+      convertOperandsOfOpSequence(op_seq);
+    });
  }
  
  void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
@@ -405,7 +407,7 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
  
        obj.type(ir::DataType::FLOAT16);
  
-      VERBOSE(Fp32ToFp16Converter) << "Input Operand #" << ind.value() << ": fp16" << std::endl;
+      VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl;
      }
  
      for (auto &ind : node.getOutputs())
@@ -419,7 +421,7 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
  
        obj.type(ir::DataType::FLOAT16);
  
-      VERBOSE(Fp32ToFp16Converter) << "Output Operand #" << ind.value() << ": fp16" << std::endl;
+      VERBOSE(Fp32ToFp16Converter) << "Output Operand " << ind << ": fp16" << std::endl;
      }
    }
  }
@@ -444,7 +446,7 @@ void Fp32ToFp16Converter::convertDatas()
  
        obj.data(std::move(new_data));
        obj.type(ir::DataType::FLOAT16);
-      VERBOSE(Fp32ToFp16Converter) << "Constant Operand #" << ind.value() << ": fp16" << std::endl;
+      VERBOSE(Fp32ToFp16Converter) << "Constant Operand " << ind << ": fp16" << std::endl;
      }
    });
  }
@@ -515,21 +517,21 @@ void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_s
  {
    const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
    assert(lower_info != nullptr);
-  auto new_lower_info = std::make_unique<ir::operand::LowerInfo>();
-  auto permute_factor = ir::operand::PermuteFactor(lower_info->backend(), lower_info->layout());
+  auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>();
+  auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout());
    new_lower_info->addDefPermuteFactor(permute_factor);
    new_lower_info->addUsePermuteFactor(permute_factor);
    _lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info));
  }
  
-void Fp32ToFp16Converter::setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
-                                                    const ir::OpSequenceIndex &new_op_seq_ind)
+void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
+                                                   const ir::OpSequenceIndex &new_op_seq_ind)
  {
    const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
    assert(lower_info != nullptr);
  
    auto new_lower_info =
-      std::make_unique<ir::operation::LowerInfo>(lower_info->backend(), lower_info->layout());
+    std::make_unique<compiler::OperationLowerInfo>(lower_info->backend(), lower_info->layout());
    _lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info));
  }
  
@@ -600,7 +602,7 @@ Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_se
    auto &new_op_obj = operands.at(new_op_ind);
  
    std::unique_ptr<ir::Operation> new_node(
-      new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
+    new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
    const auto new_node_ind = operations.push(std::move(new_node));
  
    input_obj.insertUse(new_node_ind);
@@ -620,7 +622,7 @@ Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_se
    auto &new_op_obj = operands.at(new_op_ind);
  
    std::unique_ptr<ir::Operation> new_node(
-      new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
+    new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
    const auto new_node_ind = operations.push(std::move(new_node));
  
    new_op_obj.insertUse(new_node_ind);
@@ -759,9 +761,8 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_
            opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind);
          }
  
-        VERBOSE(Fp32ToFp16Converter)
-            << "Contiguous from OpSeq#" << op_seq_ind_fp16_to_fp32.value() << "(ToFp32)"
-            << " to OpSeq#" << op_seq_ind.value() << "(ToFp16)" << std::endl;
+        VERBOSE(Fp32ToFp16Converter) << "Contiguous from " << op_seq_ind_fp16_to_fp32 << "(ToFp32)"
+                                     << " to " << op_seq_ind << "(ToFp16)" << std::endl;
        }
      }
    }
@@ -842,7 +843,7 @@ Fp32ToFp16Converter::findOperationsToDelete(const OpSeqIndexList &list_to_delete
  }
  
  void Fp32ToFp16Converter::manipulateContiguousOpSequences(
-    const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
+  const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
  {
    auto &op_seqs = _lowered_graph.op_seqs();
  
@@ -894,8 +895,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
  }
  
  void Fp32ToFp16Converter::deleteContiguousOpSequences(
-    const OpSeqIndexList &list_to_delete_op_seqs,
-    const ir::OperandIndexSequence &list_to_delete_ops)
+  const OpSeqIndexList &list_to_delete_op_seqs, const ir::OperandIndexSequence &list_to_delete_ops)
  {
    auto &operands = _lowered_graph.graph().operands();
    auto &operations = _lowered_graph.graph().operations();
@@ -905,21 +905,21 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
    {
      auto &op_seq = op_seqs.at(op_seq_ind);
      assert(op_seq.size() == 1);
-    VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq #" << op_seq_ind.value() << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq " << op_seq_ind << std::endl;
  
      auto &first_node_ind = op_seq.operations().at(0);
      auto &first_node = operations.at(first_node_ind);
      assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
             first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
-    VERBOSE(Fp32ToFp16Converter) << "Delete Node #" << first_node_ind.value() << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl;
  
      // Uses
      for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
      {
        auto &obj = operands.at(ind);
        obj.removeUse(first_node_ind);
-      VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Use(Node#"
-                                   << first_node_ind.value() << ") is removed" << std::endl;
+      VERBOSE(Fp32ToFp16Converter)
+        << "Operand " << ind << "'s Use(Node" << first_node_ind << ") is removed" << std::endl;
      }
  
      // Def
@@ -928,27 +928,29 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
        auto &obj = operands.at(ind);
        assert(obj.getDef() == first_node_ind);
        obj.unsetDef();
-      VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
-                                   << first_node_ind.value() << ") is removed" << std::endl;
+      VERBOSE(Fp32ToFp16Converter)
+        << "Operand " << ind << "'s Def(Node" << first_node_ind << ") is removed" << std::endl;
      }
  
      // Operation
      operations.remove(first_node_ind);
-    VERBOSE(Fp32ToFp16Converter) << "Node#" << first_node_ind.value() << " is removed" << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "Node" << first_node_ind << " is removed" << std::endl;
  
      // OpSequence
      op_seqs.remove(op_seq_ind);
-    VERBOSE(Fp32ToFp16Converter) << "OpSeq#" << op_seq_ind.value() << " is removed" << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "OpSeq" << op_seq_ind << " is removed" << std::endl;
    }
  
    // Operand
    for (auto &ind : list_to_delete_ops)
    {
      operands.remove(ind);
-    VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << " is removed" << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl;
    }
  }
  
  } // namespace compiler
  
  } // namespace onert
+
+#endif
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h

index eeecb9846164415f7234288ce036f19b09953422..87751ceb45d6c2ac65c11b014f264dc0518de23f 100644 (file)
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
@@ -14,6 +14,8 @@
   * limitations under the License.
   */
  
+#if 0 // This file is temporarily unused
+
  #ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
  #define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
  
@@ -64,8 +66,8 @@ private:
  
    void setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
                                const ir::OperandIndex &new_op_ind);
-  void setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
-                                 const ir::OpSequenceIndex &new_op_seq_ind);
+  void setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
+                                const ir::OpSequenceIndex &new_op_seq_ind);
  
    void manipulateInput(const ir::OpSequenceIndex &op_seq_ind,
                         const ir::OperandIndex &op_seq_input_ind,
@@ -99,3 +101,5 @@ private:
  } // namespace onert
  
  #endif // __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
+
+#endif
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc

index 349b1c221f8e385507a2e7fabbd58e09bf9ab673..2f996c8e8e77c0f4da146d48f9791589d04babd7 100644 (file)
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -96,12 +96,12 @@ namespace compiler
  void HEScheduler::scheduleShufflingBackends()
  {
    VERBOSE(HEScheduler::schedule)
-      << "Started task scheduling: uses all backends to get more metrics for data transfer"
-      << std::endl;
+    << "Started task scheduling: uses all backends to get more metrics for data transfer"
+    << std::endl;
    size_t backend_ind = 0;
    for (const auto &rank : _rank_to_op)
    {
-    VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl;
+    VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second << ")" << std::endl;
      const auto &node = _graph->operations().at(rank.second);
      const bool quant = isQuant(*_graph, node);
      const auto size = getOperationsFlattenedIOSize(*_graph, node);
@@ -123,7 +123,7 @@ void HEScheduler::scheduleShufflingBackends()
          continue;
        }
        const auto exec_time =
-          _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
+        _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
        // Scheduling to measure data transfer must be done after measuring all backends separately
        assert(exec_time != _exec_time->NOT_FOUND);
        if (exec_time == _exec_time->getMax())
@@ -227,7 +227,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph
  
    ir::OperationIndexMap<bool> visited;
    graph.operations().iterate(
-      [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
+    [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
    // for each task select the backend with the smallest earliest finishing time(eft)
    for (const auto &rank : _rank_to_op)
    {
@@ -267,9 +267,9 @@ int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backen
    if (!_is_profiling_mode)
    {
      VERBOSE(HEScheduler::tryBackend)
-        << "Trying to HE schedule while there is no profiling info for " << node.name()
-        << " on backend " << backend->config()->id() << ". So this backend won't be used. "
-        << std::endl;
+      << "Trying to HE schedule while there is no profiling info for " << node.name()
+      << " on backend " << backend->config()->id() << ". So this backend won't be used. "
+      << std::endl;
      _is_supported[backend][node.name()] = false;
      return _exec_time->getMax();
    }
@@ -300,7 +300,7 @@ void HEScheduler::makeRank()
    VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl;
  
    _graph->operations().iterate(
-      [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
+    [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
  
    // Check that ranks are calculated for all operations(nodes)
    _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
@@ -369,8 +369,8 @@ int64_t HEScheduler::DFSMaxRank(const ir::OperationIndex &index)
    assert(rank >= 0);
    _rank_to_op.emplace(rank, index);
    _op_to_rank->emplace(index, rank);
-  VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name()
-                                   << " is " << rank << std::endl;
+  VERBOSE(HEScheduler::DFSMaxRank)
+    << "rank of operation (" << index << ")" << node.name() << " is " << rank << std::endl;
  
    return rank;
  }
@@ -393,9 +393,9 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
          {
            continue;
          }
-        // TODO Change it to controlflow backend
+        // TODO Change it to builtin backend
          auto transfer_cost =
-            getPermuteTime(backend, other_backend, quant, operand.info().total_size());
+          getPermuteTime(backend, other_backend, quant, operand.info().total_size());
          avg_transfer_cost += transfer_cost;
        }
      }
@@ -428,7 +428,7 @@ int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
  
  bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend)
  {
-  VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl;
+  VERBOSE(HEScheduler::schedule) << "scheduling (" << index << ")" << std::endl;
    int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
    const auto &node = _graph->operations().at(index);
  
@@ -551,18 +551,18 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation
    if (!_is_parallel_exec)
    {
      VERBOSE(HEScheduler::ESTAndExecTime)
-        << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
-        << backend->config()->id() << " is " << exec_time
-        << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+      << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on "
+      << backend->config()->id() << " is " << exec_time
+      << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
  
      return {total_transfer_cost, exec_time};
    }
    VERBOSE(HEScheduler::ESTAndExecTime)
-      << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
-      << backend->config()->id() << ": " << exec_time
-      << " microseconds. Backend available time: " << prev_op_ft
-      << " Parent's max eft: " << max_pred_eft - total_transfer_cost
-      << " data transfer cost: " << total_transfer_cost << std::endl;
+    << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on "
+    << backend->config()->id() << ": " << exec_time
+    << " microseconds. Backend available time: " << prev_op_ft
+    << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+    << " data transfer cost: " << total_transfer_cost << std::endl;
  
    return {prev_op_ft, exec_time};
  }
@@ -587,7 +587,7 @@ int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Opera
        {
          // Multiply operand size by 2 because size must describe input+output size
          int64_t transfer_cost =
-            getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
+          getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
          transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
        }
      }
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h

index b9cee58815d96aa3d730b5f78be932df83625841..1a95b988136c87f7199547408bba6e0027ca4d57 100644 (file)
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -50,26 +50,26 @@ public:
     * @param[in] model Graph model
     * @param[in] backend_resolver backend resolver
     */
-  HEScheduler(const backend::BackendContexts &backend_contexts, const CompilerOptions &options)
-      : _is_supported{}, _backends_avail_time{}, _ops_eft{},
-        _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
-        _is_profiling_mode{options.he_profiling_mode},
-        _is_linear_exec{options.executor == "Linear"},
-        _is_parallel_exec{options.executor == "Parallel"}
+  HEScheduler(const std::vector<const backend::Backend *> &backends, const CompilerOptions &options)
+    : _is_supported{}, _backends_avail_time{}, _ops_eft{},
+      _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
+      _is_profiling_mode{options.he_profiling_mode}, _is_linear_exec{options.executor == "Linear"},
+      _is_parallel_exec{options.executor == "Parallel"}
    {
-    for (auto &entry : backend_contexts)
+    for (auto entry : backends)
      {
-      if (entry.first->config()->id() == backend::controlflow::Config::ID)
+      if (entry->config()->id() == backend::builtin::Config::ID)
          continue;
-      _all_backends.push_back(entry.first);
+      _all_backends.push_back(entry);
      }
      _backend_resolver = std::make_unique<compiler::BackendResolver>();
      _exec_time = std::make_unique<exec::ExecTime>(_all_backends);
  
      // Find cpu backend
-    auto cpu_backend_it = std::find_if(
-        _all_backends.begin(), _all_backends.end(),
-        [](const backend::Backend *backend) { return backend->config()->id() == "cpu"; });
+    auto cpu_backend_it =
+      std::find_if(_all_backends.begin(), _all_backends.end(), [](const backend::Backend *backend) {
+        return backend->config()->id() == "cpu";
+      });
      if (cpu_backend_it == _all_backends.end())
        throw std::runtime_error("HEScheduler could be used only if 'cpu' backend is available");
      _cpu_backend = *cpu_backend_it;
@@ -173,7 +173,7 @@ private:
    std::unique_ptr<exec::ExecTime> _exec_time;
    const ir::Graph *_graph{nullptr};
    std::vector<const backend::Backend *> _all_backends;
-  const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to controlflow_backend
+  const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to _builtin_backend
    bool _is_profiling_mode;
    bool _is_linear_exec;
    bool _is_parallel_exec;
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc

index fdd2a7653881482f596e28b2f73fcbdac5bd23ad..73ba962388bf568a8ae8ae591ab1f0f983a76941 100644 (file)
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -15,48 +15,37 @@
   */
  
  #include <algorithm>
+#include <sstream>
  
  #include "Linear.h"
  
  #include "backend/IConfig.h"
  #include "backend/Backend.h"
  #include "util/logging.h"
+#include "dumper/text/GraphDumper.h"
  
  namespace onert
  {
  namespace compiler
  {
  
-std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
+// TODO(easy) Change the LoweredGraph param to Graph
+std::vector<ir::OperationIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
  {
-  std::vector<ir::OpSequenceIndex> order;
-  lowered_graph.iterateTopolOpSeqs(
-      [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) -> void {
-        order.emplace_back(index);
-      });
-  return order;
+  return lowered_graph.graph().topolSortOperations();
  }
  
+// TODO(easy) Change the LoweredGraph param to Graph
  void Linear::dump(const compiler::LoweredGraph &lowered_graph,
-                  const std::vector<ir::OpSequenceIndex> &order)
+                  const std::vector<ir::OperationIndex> &order)
  {
+  for (const auto ind : order)
    {
-    const auto &toString = [](const onert::backend::Backend *backend) {
-      assert(backend);
-      std::string str;
-      str += backend->config()->id();
-      return "{" + str + "}";
-    };
-
-    VERBOSE(Linear) << "Final OpSequence" << std::endl;
-    for (const auto index : order)
-    {
-      const auto &op_seq = lowered_graph.op_seqs().at(index);
-      const auto lower_info = lowered_graph.getLowerInfo(index);
-      const auto &operations = lowered_graph.graph().operations();
-      VERBOSE(Linear) << "* OP_SEQ " << toString(lower_info->backend()) << " "
-                      << ir::getStrFromOpSeq(op_seq, operations) << std::endl;
-    }
+    // TODO Could logging system can handle this? (Inserting prefix for each line)
+    std::istringstream iss{dumper::text::formatOperation(lowered_graph.graph(), ind)};
+    std::string line;
+    while (std::getline(iss, line))
+      VERBOSE(GraphDumper) << line << std::endl;
    }
  }
  
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h

index 56b42ccb0bb9a4eaec5a34de19ad176ae6514b68..9ac9a01391111f7521d662edab1dd25cf5673bfc 100644 (file)
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -20,18 +20,9 @@
  #include <vector>
  #include <memory>
  
-#include "ir/OpSequences.h"
  #include "ir/Index.h"
  #include "compiler/LoweredGraph.h"
  
-namespace onert
-{
-namespace ir
-{
-struct OperationVisitor;
-} // namespace ir
-} // namespace onert
-
  namespace onert
  {
  namespace compiler
@@ -40,9 +31,9 @@ namespace compiler
  class Linear
  {
  public:
-  static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
+  static std::vector<ir::OperationIndex> linearize(const compiler::LoweredGraph &lowered_graph);
    static void dump(const compiler::LoweredGraph &lowered_graph,
-                   const std::vector<ir::OpSequenceIndex> &order);
+                   const std::vector<ir::OperationIndex> &order);
  };
  
  } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc

index 6d5210dc55c169baac82d9568132f2b691bf5078..b469b991b565cd8a407c97483fd283f202825c94 100644 (file)
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -17,6 +17,7 @@
  #include "compiler/LoweredGraph.h"
  
  #include <assert.h>
+#include <algorithm>
  #include <sstream>
  #include "util/logging.h"
  #include "compiler/pass/ConstantInsertionPass.h"
@@ -25,7 +26,7 @@
  #include "compiler/pass/PermutationOperationPass.h"
  #include "compiler/pass/PermutationInsertionPass.h"
  #include "compiler/pass/PermutationEliminationPass.h"
-#include "ir/GraphIterator.h"
+#include "dumper/text/GraphDumper.h"
  #include "ir/verifier/Verifier.h"
  #include "backend/Backend.h"
  #include "backend/IConfig.h"
@@ -48,16 +49,8 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
      options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
    }
  
-  bool linear_executor = (options.executor == "Linear");
-
    // Build backend contexts
    auto &backend_manager = BackendManager::get();
-
-  // Always create Controlflow backend context
-  auto cf_backend = backend_manager.getControlflow();
-  _backend_contexts.emplace(
-      cf_backend, cf_backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor));
-
    // Create contexts for other backends
    for (auto backend_str : options.backend_list)
    {
@@ -72,9 +65,6 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
        VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str << std::endl;
        continue;
      }
-
-    _backend_contexts.emplace(
-        backend, backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor));
    }
    if (backend_manager.num_backends() == 0)
      throw std::runtime_error{"No available backends loaded."};
@@ -82,305 +72,115 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
    // TODO Move "schedule" phase out of here
    // Schedule
    std::unique_ptr<BackendResolver> backend_resolver;
+  auto all_backends = backend_manager.getAll();
    if (options.he_scheduler)
    {
-    auto scheduler = HEScheduler(_backend_contexts, options);
+    auto scheduler = HEScheduler(all_backends, options);
      backend_resolver = scheduler.schedule(_graph);
      _indexed_ranks = scheduler.getIndexedRanks();
    }
    else
    {
-    auto scheduler = ManualScheduler(_backend_contexts, options);
+    auto scheduler = ManualScheduler(all_backends, options);
      backend_resolver = scheduler.schedule(_graph);
    }
  
-  {
-    // operand::LowerInfo holder
-    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
-
-    _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
-      operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
-    });
-
-    // Make op_seqs while checking whether a node can be merged into a op_seq.
-    makeOpSequences(operands_lower_info, options, *backend_resolver);
-
-    _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-      assert(op_seq.operations().size() > 0);
-      std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
-    });
+  makeLowerInfo(*backend_resolver);
+  VERBOSE(LoweredGraph) << "dump before mandatory passes" << std::endl;
+  dumper::text::dumpLoweredGraph(*this);
  
-    VERBOSE(OpSequences) << "dump before permutation insertion" << std::endl;
-    dumpOpSequences(_op_seqs, _graph.operations());
-
-    // Mandatory passes
-    pass::PassRunner{}
-        .append(std::make_unique<pass::ConstantInsertionPass>(*this))
-        .append(std::make_unique<pass::ConstantLoweringPass>(*this))
-        .run();
-
-    // Set LowerInfo for each operand from the operand::LowerInfo holder
-    manipulateLowerInfo(operands_lower_info);
-
-    dumpLowerInfo();
-  }
-
-  // Mandatory passes
+  // Mandatory passes - kind of legalization(?)
    pass::PassRunner{}
-      .append(std::make_unique<pass::PermutationOperationPass>(*this))
-      .append(std::make_unique<pass::PermutationInsertionPass>(*this))
-      .run();
+    .append(std::make_unique<pass::ConstantInsertionPass>(*this))
+    .append(std::make_unique<pass::ConstantLoweringPass>(*this))
+    .append(std::make_unique<pass::PermutationOperationPass>(*this))
+    .append(std::make_unique<pass::PermutationInsertionPass>(*this))
+    .run();
+
+  dumpLowerInfo();
  
-  // Optimization passes
+  // Optimization passes (optional)
    pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
  
-  VERBOSE(LoweredGraph) << "Dump after permutation insertion" << std::endl;
+  VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl;
    for (auto operand : _graph.getInputs())
      VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
    for (auto operand : _graph.getOutputs())
      VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
-  dumpOpSequences(_op_seqs, _graph.operations());
+  dumper::text::dumpLoweredGraph(*this);
  
    // Graph verifications
    {
      assert(ir::verifier::InputOutputChecker().verify(_graph));
      assert(ir::verifier::DAGChecker().verify(_graph));
-    assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
+    assert(ir::verifier::EdgeChecker().verify(_graph));
    }
  }
  
-const ir::operation::LowerInfo *
-LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
-{
-  auto itr = _lower_info_map.op_seq.find(op_seq_index);
-  if (itr == _lower_info_map.op_seq.end())
-    return nullptr;
-  return itr->second.get();
-}
-
-void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
-                                std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
+void LoweredGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver)
  {
-  _lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
-}
+  _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+    lower_info().operand.set(index, std::make_unique<OperandLowerInfo>());
+  });
  
-void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
-{
-  auto &op_seq_lower_info = _lower_info_map.op_seq;
-  assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
-  for (auto it = op_seq_lower_info.begin(); it != op_seq_lower_info.end(); ++it)
-  {
-    if (it->first == op_seq_index)
+  // Set operand lower info using assigned backends to operations
+  _graph.operations().iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &) {
+    const ir::Operation &op = _graph.operations().at(op_ind);
+    auto backend = backend_resolver.getBackend(op_ind);
+    if (!backend)
      {
-      op_seq_lower_info.erase(it);
-      break;
+      throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"};
      }
-  }
-}
-
-const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
-{
-  auto itr = _lower_info_map.operand.find(index);
-  if (itr == _lower_info_map.operand.end())
-    return nullptr;
-  return itr->second.get();
-}
-
-ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
-{
-  auto itr = _lower_info_map.operand.find(index);
-  if (itr == _lower_info_map.operand.end())
-    return nullptr;
-  return itr->second.get();
-}
-
-void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
-                                std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
-{
-  _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
-}
-
-void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
-{
-  _lower_info_map.operand.erase(index);
-}
-
-void LoweredGraph::iterateTopolOpSeqs(
-    const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
-{
-  // Topological Sorting for ir::OpSequences
-  std::vector<ir::OpSequenceIndex> topol_sorted;
-  ir::PostDfsIterator<true>{}.iterateOpSeqs(
-      *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
-        topol_sorted.emplace_back(index);
-      });
-  std::reverse(topol_sorted.begin(), topol_sorted.end());
-  for (const auto op_seq_idx : topol_sorted)
-  {
-    const auto &op_seq = _op_seqs.at(op_seq_idx);
-    fn(op_seq_idx, op_seq);
-  }
-}
-
-void LoweredGraph::iterateTopolOpSeqs(
-    const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
-{
-  // Topological Sorting for ir::OpSequences
-  std::vector<ir::OpSequenceIndex> topol_sorted;
-  ir::PostDfsIterator<false>{}.iterateOpSeqs(
-      *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
-        topol_sorted.emplace_back(index);
-      });
-  std::reverse(topol_sorted.begin(), topol_sorted.end());
-  for (const auto op_seq_idx : topol_sorted)
-  {
-    auto &op_seq = _op_seqs.at(op_seq_idx);
-    fn(op_seq_idx, op_seq);
-  }
-}
-
-ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
-                                                              const ir::Operation &node)
-{
-  // Create a fresh op_seq with one operation, and append it to op_seqs
-  // Create a fresh op_seq
-  auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
-
-  // Add an operation
-  op_seq->appendOperation(node_index);
-
-  // Update input/output
-  op_seq->setOutputs(node.getOutputs());
-  op_seq->setInputs(node.getInputs());
-
-  return _op_seqs.emplace(std::move(op_seq));
-}
-
-void LoweredGraph::makeOpSequences(
-    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
-    const CompilerOptions &options, const BackendResolver &backend_resolver)
-{
-  // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
-  const int op_seq_max_node = options.op_seq_max_node;
-  assert(op_seq_max_node >= 0);
-
-  bool is_profiling = options.he_profiling_mode;
-  ir::OpSequence *op_seq = nullptr;
-  ir::OpSequenceIndex op_seq_index;
-
-  // NOTE: The below method appends nodes while making one op_seq if needed. If something better
-  // ways, happy to update this code.
-  ir::PostDfsConstIterator{}.iterate(
-      _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
-        // LowerInfo for in/output operands
-        auto backend = backend_resolver.getBackend(node_index);
-
-        // Get frontend's layout
-        auto frontend_layout = _graph.layout();
-
-        // The layout of each backend should be set at another place
-        // TODO Change setting layout of each backend at another place
-        auto backend_layout = backend->config()->supportLayout(node, frontend_layout);
-
-        for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
-        {
-          auto &&lower_info = operands_lower_info.at(operand);
-          lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
-        }
-        for (auto operand : node.getOutputs() | ir::Remove::UNDEFINED)
-        {
-          auto &&lower_info = operands_lower_info.at(operand);
-          lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
-        }
-
-        bool new_op_seq = (op_seq == nullptr ||
-                           (op_seq_max_node != 0 &&
-                            op_seq->operations().size() >= static_cast<size_t>(op_seq_max_node)));
  
-        // for profiling each op_seq must contain just one node,
-        // so that we can measure a node separately
-        if (new_op_seq || is_profiling ||
-            !mergeable(op_seq_index, node_index, backend_layout, backend_resolver))
-        {
-          auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
-
-          // ir::OpSequence LowerInfo
-          setLowerInfo(new_op_seq_index,
-                       std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
-
-          op_seq_index = new_op_seq_index;
-          op_seq = &(_op_seqs.at(new_op_seq_index));
-
-          VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is created for "
-                         << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
-        }
-        else
-        {
-          op_seq->appendOperation(node_index);
-          // Set inputs
-          auto new_inputs = node.getInputs();
-          // Add inputs except outputs of the previous node
-          for (auto ind : op_seq->getInputs())
-          {
-            if (!node.getOutputs().contains(ind))
-              new_inputs.append(ind);
-          }
-          op_seq->setInputs(new_inputs);
+    auto frontend_layout = _graph.layout();
  
-          VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " merges "
-                         << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
-        }
-      });
-}
+    // The layout of each backend should be set at another place
+    // TODO Change setting layout of each backend at another place
+    auto backend_layout = backend->config()->supportLayout(op, frontend_layout);
  
-void LoweredGraph::manipulateLowerInfo(
-    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info)
-{
-  const auto controlflow_backend = BackendManager::get().getControlflow();
+    for (auto ind : op.getInputs() | ir::Remove::UNDEFINED)
+    {
+      auto &operand_li = lower_info().operand.at(ind);
+      operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout});
+    }
+    for (auto ind : op.getOutputs() | ir::Remove::UNDEFINED)
+    {
+      auto &operand_li = lower_info().operand.at(ind);
+      operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout});
+    }
+    lower_info().operation.set(
+      op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout));
+  });
  
-  // TODO Rather than using NHWC Get frontend layout of this node from IR
-  auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
+  // Handle graph inputs and outputs
+  const auto builtin_backend = BackendManager::get().getBuiltin();
+  auto factor = PermuteFactor{builtin_backend, _graph.layout()};
    for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
    {
-    auto &&lower_info = operands_lower_info.at(index);
-    assert(lower_info->def_factors().empty());
-    lower_info->addDefPermuteFactor(factor);
-  }
-  for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
-  {
-    auto &&lower_info = operands_lower_info.at(index);
-    lower_info->addUsePermuteFactor(factor);
+    auto &operand_li = lower_info().operand.at(index);
+    assert(operand_li.def_factors().empty());
+    operand_li.addDefPermuteFactor(factor);
    }
    for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
    {
-    auto &&lower_info = operands_lower_info.at(index);
-    if (lower_info->def_factors().size() == 0)
-    {
-      // In case of that an operand is Graph's output and not input or output of any operation
-      lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
-          controlflow_backend,
-          ir::Layout::NHWC // TODO Get frontend layout of this node from IR
-      });
-    }
+    auto &operand_li = lower_info().operand.at(index);
+    operand_li.addUsePermuteFactor(factor);
    }
  
-  // 1. Add def of variable operand
-  // 2. Set LowerInfo for each operand from the operand::LowerInfo holder
+  // Handle variable tensors
    _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) {
      // Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs
-    // and not undefined operand. Those inputs must have exist as a Tensor. For example,
-    // UnidirectionalSequenceLSTM operation could have state inputs such as it.
+    // and not undefined operand - these are variable tensors. For example,
+    // UnidirectionalSequenceLSTM has such inputs.
      if (operand.info().isVariable())
      {
        // The variable operand with buffer is not supported yet
        assert(operand.data() == nullptr);
        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-      auto &lowered_info = operands_lower_info[index];
-      assert(lowered_info->def_factors().empty());
-      lowered_info->addDefPermuteFactor(lowered_info->use_factors().getOnlyElement());
+      auto operand_li = lower_info().operand.at(index);
+      assert(operand_li.def_factors().empty());
+      operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement());
      }
-
-    setLowerInfo(index, std::move(operands_lower_info[index]));
    });
  }
  
@@ -392,10 +192,20 @@ void LoweredGraph::dumpLowerInfo()
    std::map<uint32_t, std::string> dumps;
  
    _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
-    std::stringstream sstream;
-    if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
+    const auto operand_lower_info = lower_info().operand.getRawPtr(index);
+    assert(operand_lower_info);
+    if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty())
      {
-      auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
+      auto shape_to_string = [](const ir::Shape &shape) {
+        std::stringstream sstream;
+        sstream << "{ ";
+        for (auto i = 0; i < shape.rank(); ++i)
+          sstream << (shape.dim(i)) << " ";
+        sstream << "}";
+        return sstream.str();
+      };
+
+      auto factors_to_string = [](const PermuteFactorSet &factors) {
          std::string str;
          for (auto factor : factors)
          {
@@ -406,161 +216,44 @@ void LoweredGraph::dumpLowerInfo()
          return "{ " + str + "}";
        };
  
-      auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
-        std::string str;
+      auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) {
+        std::stringstream sstream;
+        sstream << "{ ";
          for (auto op : operations)
-        {
-          str += std::to_string(op.value());
-          str += " ";
-        }
-        return "{ " + str + "}";
+          sstream << op << " ";
+        sstream << "}";
+        return sstream.str();
+      };
+
+      auto data_to_str = [](const ir::Data *data) {
+        return (data ? (std::to_string(data->size()) + " bytes") : "N/A");
        };
  
-      const auto lower_info = getLowerInfo(index);
-      const auto &shape = object.shape();
-      std::string def_ops =
-          object.getDef().valid() ? std::to_string(object.getDef().value()) : "N/A";
-      std::string use_ops = operation_index_to_string(object.getUses());
-      std::string def_layouts = factors_to_string(lower_info->def_factors());
-      std::string use_layouts = factors_to_string(lower_info->use_factors());
-      sstream << "Operand #" << index.value() << " LowerInfo" << std::endl;
-      sstream << "  - Shape           : { ";
-      for (auto i = 0; i < shape.rank(); ++i)
-      {
-        sstream << (shape.dim(i)) << " ";
-      }
-      sstream << "}" << std::endl;
-      sstream << "  - Def Operations  : " << def_ops << std::endl;
-      sstream << "  - Use Operations  : " << use_ops << std::endl;
-      sstream << "  - Data            : "
-              << (object.data() ? (std::to_string(object.data()->size()) + " bytes") : "N/A")
-              << std::endl;
-      sstream << "  - Lower Info" << std::endl;
-      sstream << "    - Def Backends    : " << def_layouts << std::endl;
-      sstream << "    - Use Backends    : " << use_layouts << std::endl;
+      std::string shape_str = shape_to_string(object.shape());
+      std::string def_op = operation_index_set_to_string({object.getDef()});
+      std::string use_ops = operation_index_set_to_string(object.getUses());
+      std::string def_factors = factors_to_string(operand_lower_info->def_factors());
+      std::string use_factors = factors_to_string(operand_lower_info->use_factors());
+      std::stringstream sstream;
+      sstream << "Operand " << index << " Info" << std::endl;
+      sstream << "  - Shape     : " << shape_str << std::endl;
+      sstream << "  - Def/Uses  : Def " << def_op << " Uses " << use_ops << std::endl;
+      sstream << "  - Data      : " << data_to_str(object.data()) << std::endl;
+      sstream << "  - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl;
+      dumps.emplace(index.value(), sstream.str());
      }
-    dumps.emplace(index.value(), sstream.str());
    });
  
    for (const auto &e : dumps)
    {
      if (!e.second.empty())
      {
-      VERBOSE(Lower) << e.second;
-    }
-  }
-}
-
-bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
-                             const ir::OperationIndex &node_index, ir::Layout layout,
-                             const BackendResolver &backend_resolver)
-{
-  // Are they mergeable?
-  // 1. the same backend id and layout?
-  // 2. Is op_seq or node branched?
-  // 3. if 1 is true, the op_seq and a node are connected?
-  const auto &op_seq = _op_seqs.at(op_seq_index);
-  const auto &node = _graph.operations().at(node_index);
-
-  // The same backend id and layout?
-  {
-    const auto op_seq_backend_layout = getLowerInfo(op_seq_index)->layout();
-    const auto &op_seq_backend_id = getLowerInfo(op_seq_index)->backend()->config()->id();
-    const auto &node_backend_id = backend_resolver.getBackend(node_index)->config()->id();
-    VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " { " << op_seq_backend_id << "("
-                   << to_string(op_seq_backend_layout) << ") } "
-                   << " NODE#" << node_index.value() << " (" << node.name() << ") { "
-                   << node_backend_id << "(" << to_string(layout) << ") } " << std::endl;
-    if (op_seq_backend_id != node_backend_id || op_seq_backend_layout != layout)
-      return false;
-  }
-
-  // Branched?
-  {
-    std::unordered_set<ir::OperationIndex> branched_set;
-
-    // Check for branching up
-    for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
-    {
-      const auto &input_obj = _graph.operands().at(input);
-      auto def = input_obj.getDef();
-      if (def.valid())
-      {
-        branched_set.insert(def);
-        if (branched_set.size() > 1)
-        {
-          return false;
-        }
-      }
-    }
-    branched_set.clear();
-
-    // Check for branching down
-    for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
-    {
-      // TODO Fix this workaround for the case of model outputs that are used by another operation
-      //      This is needed since the branching is decided by operation, but for model outputs,
-      //      there is controlflow backen(use backend) but no actual use operation exists
-      if (_graph.getOutputs().contains(output))
-        return false;
-
-      const auto &output_obj = _graph.operands().at(output);
-      for (const auto &use : output_obj.getUses())
-      {
-        branched_set.insert(use);
-        if (branched_set.size() > 1)
-        {
-          return false;
-        }
-      }
-    }
-  }
-
-  // Connected?
-  // an input of one node is an output of the other node? or vice-versa?
-  {
-    const auto &node_inputs = node.getInputs();
-    const auto &node_outputs = node.getOutputs();
-
-    // op_seq's operations are in order so that we just check the first and the last
-    std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
-    if (op_seq.operations().size() > 1)
-      op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
-
-    for (const auto &n_index : op_seq_ops)
-    {
-      const auto &n = _graph.operations().at(n_index);
-
-      // node's output == op_seq's input?
-      for (const auto input : n.getInputs() | ir::Remove::UNDEFINED)
-      {
-        if (node_outputs.contains(input))
-        {
-          VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value()
-                         << "(" << n.name() << ") is connected to NODE#" << node_index.value()
-                         << "(" << node.name() << ")" << std::endl;
-          return true;
-        }
-      }
-
-      // node's input == op_seq's output?
-      for (const auto output : n.getOutputs() | ir::Remove::UNDEFINED)
-      {
-        if (node_inputs.contains(output))
-        {
-          VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value()
-                         << " (" << n.name() << ") is connected to NODE#" << node_index.value()
-                         << std::endl;
-          return true;
-        }
-      }
+      std::istringstream iss(e.second);
+      std::string line;
+      while (std::getline(iss, line))
+        VERBOSE(Lower) << line << std::endl;
      }
-
-    VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is not connected to NODE#"
-                   << node_index.value() << "(" << node.name() << ")" << std::endl;
    }
-
-  return false;
  }
  
  } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc

index 1f4a47864c288dba077c5a96a454841827f18389..af2d84cd95591d7b6b9986aea3c41a1c46ecacca 100644 (file)
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -29,9 +29,9 @@ namespace onert
  namespace compiler
  {
  
-ManualScheduler::ManualScheduler(const backend::BackendContexts &backend_contexts,
+ManualScheduler::ManualScheduler(const std::vector<const backend::Backend *> &backends,
                                   const compiler::CompilerOptions &options)
-    : _backend_contexts{backend_contexts}, _options{options}
+  : _backends{backends}, _options{options}
  {
  }
  
@@ -88,23 +88,21 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
      try
      {
        graph.operations().at(key); // Check if exist, or this will throw
-      backend_resolver->setBackend(
-          key, BackendManager::get().get(
-                   val)); // TODO Ensure this backend is available in backend contexts
+      backend_resolver->setBackend(key, BackendManager::get().get(val));
      }
      catch (...)
      {
-      VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @"
-                               << key.value() << " -> \"" << val << "\"" << std::endl;
+      VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @" << key
+                               << " -> \"" << val << "\"" << std::endl;
      }
    }
  
    // Dump final assignment
    WHEN_LOG_ENABLED(backend_resolver->iterate(
-      [&](const ir::OperationIndex &index, const backend::Backend &backend) {
-        VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
-                                 << backend.config()->id() << std::endl;
-      }));
+    [&](const ir::OperationIndex &index, const backend::Backend &backend) {
+      VERBOSE(ManualScheduler) << "backend for " << index << ": " << backend.config()->id()
+                               << std::endl;
+    }));
  
    return backend_resolver;
  }
@@ -114,7 +112,7 @@ const backend::Backend *ManualScheduler::resolveBackend(const std::string &id,
  {
    // Ensure if the backend is available in the current backend context
    const backend::Backend *backend = BackendManager::get().get(id);
-  if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
+  if (!backend || std::find(_backends.begin(), _backends.end(), backend) == _backends.end())
    {
      backend = fallback;
    }
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.h b/runtime/onert/core/src/compiler/ManualScheduler.h

index 41503f7ff05dd64fd946a1319dd6f0cc9d72b7c1..18732d7449bd16595519ec3a6c07828d0e3274d3 100644 (file)
--- a/runtime/onert/core/src/compiler/ManualScheduler.h
+++ b/runtime/onert/core/src/compiler/ManualScheduler.h
@@ -28,7 +28,7 @@ namespace compiler
  class ManualScheduler : public IScheduler
  {
  public:
-  ManualScheduler(const backend::BackendContexts &backend_contexts,
+  ManualScheduler(const std::vector<const backend::Backend *> &backends,
                    const compiler::CompilerOptions &options);
    std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) override;
  
@@ -37,7 +37,7 @@ private:
                                           const backend::Backend *fallback = nullptr);
  
  private:
-  const backend::BackendContexts &_backend_contexts;
+  std::vector<const backend::Backend *> _backends;
    compiler::CompilerOptions _options;
  };
  
diff --git a/runtime/onert/core/src/compiler/OperationLowerInfo.cc b/runtime/onert/core/src/compiler/OperationLowerInfo.cc

new file mode 100644 (file)

index 0000000..e8a4381
--- /dev/null
+++ b/runtime/onert/core/src/compiler/OperationLowerInfo.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/OperationLowerInfo.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+OperationLowerInfo::OperationLowerInfo(const backend::Backend *backend, ir::Layout layout)
+  : _permute_factor{backend, layout}
+{
+  // DO NOTHING
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ParamChecker.cc b/runtime/onert/core/src/compiler/ParamChecker.cc

deleted file mode 100644 (file)

index c4f80f0..0000000
--- a/runtime/onert/core/src/compiler/ParamChecker.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ParamChecker.h"
-
-#include "ir/Graph.h"
-
-namespace onert
-{
-namespace compiler
-{
-
-void ParamChecker::operator()()
-{
-  _model->operations().iterate(
-      [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
-}
-
-} // namespace compiler
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ParamChecker.h b/runtime/onert/core/src/compiler/ParamChecker.h

deleted file mode 100644 (file)

index 61429d5..0000000
--- a/runtime/onert/core/src/compiler/ParamChecker.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ParamChecker.h
- * @brief This file contains ParamChecker to check\n
- *        operations' parameters are compilable at machine independent phase\n
- *        ex) Check param is constant
- */
-#ifndef __ONERT_COMPILER_PARAM_CHECKER_H__
-#define __ONERT_COMPILER_PARAM_CHECKER_H__
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-class Graph;
-} // namespace ir
-} // namespace onert
-
-namespace onert
-{
-namespace compiler
-{
-
-class ParamChecker : public ir::OperationVisitor
-{
-public:
-  /**
-   * @brief Construct a new Param Checker object (deleted)
-   */
-  ParamChecker(void) = delete;
-  /**
-   * @brief Construct a new Param Checker object
-   * @param[in] model Graph model to check
-   */
-  ParamChecker(std::shared_ptr<ir::Graph> model) : _model{model} {}
-
-public:
-  /**
-   * @brief Run parameter analysis
-   */
-  void operator()();
-  /**
-   * @brief   Return analysis result if model have non-const parameter
-   * @return  @c true if there is non-const parameter, otherwise @c false
-   */
-  bool haveNoneConstParam(void) { return _nonConstParam; }
-
-private:
-  const std::shared_ptr<ir::Graph> _model;
-  bool _nonConstParam{false};
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__
diff --git a/runtime/onert/core/src/compiler/PermuteFactor.cc b/runtime/onert/core/src/compiler/PermuteFactor.cc

new file mode 100644 (file)

index 0000000..f0081a2
--- /dev/null
+++ b/runtime/onert/core/src/compiler/PermuteFactor.cc
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/PermuteFactor.h"
+
+#include <assert.h>
+#include <ostream>
+
+#include "backend/Backend.h"
+
+std::ostream &operator<<(std::ostream &os, const onert::compiler::PermuteFactor &obj)
+{
+  assert(obj.backend() && obj.backend()->config());
+  return os << "(" << obj.backend()->config()->id() << "/" << to_string(obj.layout()) << ")";
+}
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc

index e0c9f5283e98a4cc3af83aa9611a989de0c7a2c2..1c7000986835a0cc1009661ad29fc052470d0727 100644 (file)
--- a/runtime/onert/core/src/compiler/ShapeValidator.cc
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -19,8 +19,6 @@
  #include <typeinfo>
  
  #include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
-
  #include "util/logging.h"
  #include "util/Utils.h"
  
@@ -37,7 +35,7 @@ namespace compiler
  {
  
  ShapeValidator::ShapeValidator(const ir::Graph &graph)
-    : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
+  : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
  {
  }
  
@@ -62,7 +60,7 @@ void ShapeValidator::operator()()
    _current_layout = _graph.layout();
  
    _graph.operations().iterate(
-      [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
+    [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
  }
  
  void ShapeValidator::visit(const ir::operation::BatchMatMul &node)
@@ -88,7 +86,7 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
  
    const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
    const auto block_size_index{
-      node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
  
    const auto frontend_layout = _current_layout;
    const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
@@ -120,11 +118,11 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
  
    const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
    const auto weight_scales_index{
-      node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_SCALES)};
+    node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_SCALES)};
    const auto weight_binary_index{
-      node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_BINARY)};
+    node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_BINARY)};
    const auto weight_cluster_index{
-      node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+    node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
    // const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)};
  
    OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 2);
@@ -153,7 +151,7 @@ void ShapeValidator::visit(const ir::operation::BCQGather &node)
    const auto input_binary_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
    const auto input_scales_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_SCALES)};
    const auto input_clusters_index{
-      node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+    node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
  
    OP_REQUIRES(_ctx.at(indices_index).shape().rank() <= 2); // TODO : support rank up to 4 or more
    OP_REQUIRES(_ctx.at(input_binary_index).shape().rank() == 2);
@@ -256,13 +254,12 @@ void ShapeValidator::visit(const ir::operation::Reduce &node)
      {
        // Reducing C or
        // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
-      OP_REQUIRES((input_shape.dim(0) == output_shape.dim(0) &&
-                   input_shape.dim(1) == output_shape.dim(1) &&
-                   input_shape.dim(2) == output_shape.dim(2)) ||
-                  (input_shape.dim(0) == output_shape.dim(0) &&
-                   (input_shape.dim(1) == output_shape.dim(1) ||
-                    input_shape.dim(2) == output_shape.dim(1)) &&
-                   input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
+      OP_REQUIRES(
+        (input_shape.dim(0) == output_shape.dim(0) && input_shape.dim(1) == output_shape.dim(1) &&
+         input_shape.dim(2) == output_shape.dim(2)) ||
+        (input_shape.dim(0) == output_shape.dim(0) &&
+         (input_shape.dim(1) == output_shape.dim(1) || input_shape.dim(2) == output_shape.dim(1)) &&
+         input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
      }
    }
  }
@@ -293,12 +290,12 @@ void ShapeValidator::visit(const ir::operation::RNN &node)
      return;
  
    const auto hidden_state_out_index{
-      node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+    node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
  
    const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
    const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
    const auto recurrent_weights_index{
-      node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+    node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
    const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
    const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
  
@@ -335,7 +332,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
  
    const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
    const auto block_size_index{
-      node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
    const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
  
    const auto frontend_layout = _current_layout;
@@ -570,48 +567,48 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
      return;
  
    const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; // Optional
+    node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; // Optional
    const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; // Optional
+    node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; // Optional
    const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; // Optional
+    node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; // Optional
  
    const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
    const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // Optional
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // Optional
    const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
    const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
    const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
    const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // Optional
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // Optional
    const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
    const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
    const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
    const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // Optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // Optional
    const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // Optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // Optional
    const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // Optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // Optional
    const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; // Optional
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; // Optional
    const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
    const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
    const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
    const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // Optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // Optional
    const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // Optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // Optional
    const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
    const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
  
    OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
@@ -620,54 +617,54 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
      OP_REQUIRES(_ctx.at(input_index).shape().dim(i) == _ctx.at(output_index).shape().dim(i));
    }
    OP_REQUIRES(
-      (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) &&
-      (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) &&
-      (!_ctx.exist(input_to_input_weights_index) ||
-       _ctx.at(input_to_input_weights_index).shape().rank() == 2) &&
-      _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
-      _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
-      _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
-      (!_ctx.exist(recurrent_to_input_weights_index) ||
-       _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
-      _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
-      _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
-      _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
-      (!_ctx.exist(projection_weights_index) ||
-       _ctx.at(projection_weights_index).shape().rank() == 2) &&
-      _ctx.at(output_state_in_index).shape().rank() == 2 &&
-      _ctx.at(cell_state_in_index).shape().rank() == 2);
+    (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) &&
+    (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) &&
+    (!_ctx.exist(input_to_input_weights_index) ||
+     _ctx.at(input_to_input_weights_index).shape().rank() == 2) &&
+    _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
+    _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
+    _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
+    (!_ctx.exist(recurrent_to_input_weights_index) ||
+     _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
+    _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+    _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+    _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+    (!_ctx.exist(projection_weights_index) ||
+     _ctx.at(projection_weights_index).shape().rank() == 2) &&
+    _ctx.at(output_state_in_index).shape().rank() == 2 &&
+    _ctx.at(cell_state_in_index).shape().rank() == 2);
  
    OP_REQUIRES(
-      (!_ctx.exist(cell_to_input_weights_index) ||
-       _ctx.at(cell_to_input_weights_index).shape().rank() == 1) &&
-      (!_ctx.exist(cell_to_forget_weights_index) ||
-       _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) &&
-      (!_ctx.exist(cell_to_output_weights_index) ||
-       _ctx.at(cell_to_output_weights_index).shape().rank() == 1) &&
-      (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) &&
-      _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
-      _ctx.at(cell_bias_index).shape().rank() == 1 &&
-      _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
-      (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1));
+    (!_ctx.exist(cell_to_input_weights_index) ||
+     _ctx.at(cell_to_input_weights_index).shape().rank() == 1) &&
+    (!_ctx.exist(cell_to_forget_weights_index) ||
+     _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) &&
+    (!_ctx.exist(cell_to_output_weights_index) ||
+     _ctx.at(cell_to_output_weights_index).shape().rank() == 1) &&
+    (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) &&
+    _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
+    _ctx.at(cell_bias_index).shape().rank() == 1 &&
+    _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
+    (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1));
  
    // CIFG assertion
    OP_REQUIRES(
-      ((!_ctx.exist(input_to_input_weights_index) ||
-        (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
-         _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
-       (!_ctx.exist(recurrent_to_input_weights_index) ||
-        (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
-         _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
-       (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) &&
-       (!_ctx.exist(cell_to_input_weights_index) ||
-        _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
-      ((_ctx.exist(input_to_input_weights_index) &&
-        (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-         _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
-       (_ctx.exist(recurrent_to_input_weights_index) &&
-        (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-         _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
-       (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0)));
+    ((!_ctx.exist(input_to_input_weights_index) ||
+      (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+       _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
+     (!_ctx.exist(recurrent_to_input_weights_index) ||
+      (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+       _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
+     (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) &&
+     (!_ctx.exist(cell_to_input_weights_index) ||
+      _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
+    ((_ctx.exist(input_to_input_weights_index) &&
+      (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+       _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
+     (_ctx.exist(recurrent_to_input_weights_index) &&
+      (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+       _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
+     (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0)));
  
    // Peephole assertion
    OP_REQUIRES(((!_ctx.exist(cell_to_forget_weights_index) ||
@@ -683,11 +680,11 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
                                      (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
                                       _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
    bool has_recurrent_to_input_weights =
-      _ctx.exist(recurrent_to_input_weights_index) &&
-      (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-       _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+    _ctx.exist(recurrent_to_input_weights_index) &&
+    (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+     _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
    bool has_input_gate_bias =
-      _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
+    _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
    bool has_cell_to_input_weights = _ctx.exist(cell_to_input_weights_index) &&
                                     _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
    bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
@@ -698,7 +695,7 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
                                  (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
                                   _ctx.at(projection_weights_index).shape().dim(1) != 0);
    bool has_projection_bias =
-      _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0;
+    _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0;
  
    // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
    // true: no CIFG
@@ -714,8 +711,8 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
    bool has_projection_param = has_projection_weights;
  
    const auto batch_size = (_ctx.at(input_index).shape().rank() == 3 && node.param().time_major)
-                              ? _ctx.at(input_index).shape().dim(1)
-                              : _ctx.at(input_index).shape().dim(0);
+                            ? _ctx.at(input_index).shape().dim(1)
+                            : _ctx.at(input_index).shape().dim(0);
    OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
                batch_size == _ctx.at(cell_state_in_index).shape().dim(0));
  
@@ -736,7 +733,7 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
                num_units == _ctx.at(cell_state_in_index).shape().dim(1));
  
    const auto output_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
+    _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
    OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
                output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
                output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
@@ -1018,9 +1015,9 @@ void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
    const auto num_lower_index{
-      node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
+    node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
    const auto num_upper_index{
-      node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
+    node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
  
    // Check for dimension constraints
    if (_ctx.at(output_index).info().isDynamic())
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc

index 1f2c6f3b9454f7c700cf74be442a8d174fca77a6..5849a9801c3a056c09bd139c5233ec98a76c4a2d 100644 (file)
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -25,40 +25,48 @@ namespace onert
  namespace compiler
  {
  
-bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
+void StaticShapeInferer::inferSubgraph(ir::SubgraphIndex subg_ind)
+{
+  StaticShapeInferer inferer(subg_ind, _lowered_subgs);
+  auto &lgraph = _lowered_subgs.at(subg_ind);
+  for (auto op_ind : lgraph->graph().topolSortOperations())
+  {
+    auto &op = lgraph->graph().operations().at(op_ind);
+    bool has_dynamic_tensor = inferer.infer(op);
+    lgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+  }
+}
+
+bool StaticShapeInferer::infer(const ir::Operation &op)
  {
    bool has_dynamic_tensor = false;
  
-  for (const auto &operation_idx : op_seq.operations())
-  {
-    auto &op = _operations.at(operation_idx);
-    auto opcode = op.opcode();
+  auto opcode = op.opcode();
+
+  _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
  
-    _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
+  // IF: need shape inference for then, else
+  // While: need shape inference for condition, body
+  if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+  {
+    op.accept(*this);
+  }
+  else
+  {
+    _return_has_dynamic_tensor = checkDynamicInput(op);
  
-    // IF: need shape inference for then, else
-    // While: need shape inference for condition, body
-    if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+    if (_return_has_dynamic_tensor)
      {
-      op.accept(*this);
+      setDynamicOutput(op);
      }
      else
      {
-      _return_has_dynamic_tensor = checkDynamicInput(op);
-
-      if (_return_has_dynamic_tensor)
-      {
-        setDynamicOutput(op);
-      }
-      else
-      {
-        op.accept(*this);
-      }
+      op.accept(*this);
      }
-
-    has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
    }
  
+  has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
+
    return has_dynamic_tensor;
  }
  
@@ -132,13 +140,13 @@ void StaticShapeInferer::dump()
    {
      const auto index = pair.first;
      const auto &lowered_subg = pair.second;
-    VERBOSE(StaticShapeInferer) << "SubGraph #" << index.value() << std::endl;
+    VERBOSE(StaticShapeInferer) << index << std::endl;
      lowered_subg->graph().operands().iterate(
-        [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-          VERBOSE(StaticShapeInferer) << "Operand #" << ind.value() << ", "
-                                      << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
-                                      << get_shape_str(operand.info().shape()) << std::endl;
-        });
+      [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+        VERBOSE(StaticShapeInferer)
+          << "  " << ind << ", " << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
+          << get_shape_str(operand.info().shape()) << std::endl;
+      });
    }
  }
  
@@ -167,7 +175,7 @@ void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
  
    // re-sizing output shape
    ir::Shape new_shape =
-      shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
+    shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
    output.info().shape(new_shape);
  }
  
@@ -189,7 +197,7 @@ void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
    const auto &input = _operands.at(input_idx);
  
    const auto cluster_idx{
-      op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+    op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
    const auto &cluster = _operands.at(cluster_idx);
  
    const auto output_idx = op.getOutputs().at(0);
@@ -200,7 +208,7 @@ void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
  
    // re-sizing output shape
    ir::Shape new_shape = shape_inference::inferBCQFullyConnectedShape(
-      input.info().shape(), cluster.info().shape(), cluster_buf);
+    input.info().shape(), cluster.info().shape(), cluster_buf);
    output.info().shape(new_shape);
  }
  
@@ -225,7 +233,7 @@ void StaticShapeInferer::visit(const ir::operation::BCQGather &op)
  
    // re-sizing output shape
    ir::Shape new_shape = shape_inference::inferBCQGatherShape(
-      indices.info().shape(), cluster.info().shape(), cluster_buf, rank, op.param());
+    indices.info().shape(), cluster.info().shape(), cluster_buf, rank, op.param());
  
    output.info().shape(new_shape);
  }
@@ -298,7 +306,7 @@ void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
  
    // re-sizing output shape
    ir::Shape new_shape =
-      shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
+    shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
    output.info().shape(new_shape);
  }
  
@@ -341,9 +349,9 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
  
    assert(axis.data()->base());
    int32_t axis_value =
-      (axis_type == ir::DataType::INT32)
-          ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
-          : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
+    (axis_type == ir::DataType::INT32)
+      ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
+      : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
  
    // re-sizing output shape
    ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value);
@@ -372,10 +380,10 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op)
  
    const auto &dims_shape = shape.info().shape();
    auto new_shape = ((dims_type == ir::DataType::INT32)
-                        ? shape_inference::inferFillShape<int32_t>(
-                              dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
-                        : shape_inference::inferFillShape<int64_t>(
-                              dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
+                      ? shape_inference::inferFillShape<int32_t>(
+                          dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+                      : shape_inference::inferFillShape<int64_t>(
+                          dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
  
    output.info().shape(new_shape);
  }
@@ -393,7 +401,7 @@ void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
    ir::Operand &output = _operands.at(output_idx);
    // re-sizing output shape
    ir::Shape new_shape =
-      shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
+    shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
    output.info().shape(new_shape);
  }
  
@@ -420,7 +428,7 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op)
  
    // re-sizing output shape
    ir::Shape new_shape =
-      shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank);
+    shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank);
    output.info().shape(new_shape);
  }
  
@@ -465,23 +473,11 @@ void StaticShapeInferer::visit(const ir::operation::If &op)
      }
    }
  
-  // re-sizing operands of then subgraph
-  StaticShapeInferer then_inferer(op.param().then_subg_index, _lowered_subgs);
-  _lowered_subgs.at(op.param().then_subg_index)
-      ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-        bool has_dynamic_tensor = then_inferer.infer(op_seq);
-        op_seq.has_dynamic_tensor(has_dynamic_tensor);
-      });
-
-  // re-sizing operands of else subgraph
-  StaticShapeInferer else_inferer(op.param().else_subg_index, _lowered_subgs);
-  _lowered_subgs.at(op.param().else_subg_index)
-      ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-        bool has_dynamic_tensor = else_inferer.infer(op_seq);
-        op_seq.has_dynamic_tensor(has_dynamic_tensor);
-      });
+  inferSubgraph(op.param().then_subg_index);
+  inferSubgraph(op.param().else_subg_index);
  
    // re-sizing output shapes
+  // TODO use then_graph / else_graph instead
    const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs();
    const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs();
    assert(outputs.size() == then_outputs.size());
@@ -515,14 +511,15 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
    auto &output = _operands.at(output_index);
  
    const auto output_state_out_index{
-      op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+    op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
  
    const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
  
    const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
  
-  if (output.info().isDynamic() || (_operands.exist(output_state_out_index) &&
-                                    _operands.at(output_state_out_index).info().isDynamic()) ||
+  if (output.info().isDynamic() ||
+      (_operands.exist(output_state_out_index) &&
+       _operands.at(output_state_out_index).info().isDynamic()) ||
        (_operands.exist(cell_state_out_index) &&
         _operands.at(cell_state_out_index).info().isDynamic()) ||
        (_operands.exist(scratch_buffer_index) &&
@@ -533,11 +530,11 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
    const auto &input = _operands.at(input_index);
  
    const auto input_to_output_weights_index{
-      op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+    op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
    const auto &input_to_output_weights = _operands.at(input_to_output_weights_index);
  
    const auto recurrent_to_output_weights_index{
-      op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+    op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
    const auto &recurrent_to_output_weights = _operands.at(recurrent_to_output_weights_index);
  
    // re-sizing outputs
@@ -575,16 +572,16 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
      auto &scratch_buffer = _operands.at(scratch_buffer_index);
  
      const auto input_to_input_weights_index{
-        op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+      op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
      const auto recurrent_to_input_weights_index{
-        op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+      op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
  
      bool has_input_to_input_weights =
-        _operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-        _operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+      _operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+      _operands.at(input_to_input_weights_index).shape().dim(1) != 0;
      bool has_recurrent_to_input_weights =
-        _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-        _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+      _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+      _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
  
      // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
      // true: no CIFG
@@ -674,8 +671,8 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op)
  
    // re-sizing output shape
    const auto new_shape = shape_inference::inferPadShape(
-      input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()),
-      pad.shape().num_elements());
+    input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()),
+    pad.shape().num_elements());
    output.info().shape(new_shape);
  }
  
@@ -722,12 +719,12 @@ void StaticShapeInferer::visit(const ir::operation::Range &op)
      if (output.typeInfo().type() == ir::DataType::FLOAT32)
      {
        new_shape = shape_inference::inferRangeShape<float>(
-          start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>());
+        start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>());
      }
      else if (output.typeInfo().type() == ir::DataType::INT32)
      {
        new_shape = shape_inference::inferRangeShape<int32_t>(
-          start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>());
+        start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>());
      }
      assert(output.shape() == new_shape);
    }
@@ -774,7 +771,7 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op)
  
    // re-sizing output shape
    ir::Shape new_shape =
-      shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims);
+    shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims);
    output.info().shape(new_shape);
  }
  
@@ -800,7 +797,7 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
        assert(shape_buf);
  
        ir::Shape new_shape = shape_inference::inferReshapeShape(
-          shape_buf, shape.shape().num_elements(), input.shape().num_elements());
+        shape_buf, shape.shape().num_elements(), input.shape().num_elements());
  
        // if shape is from Const, TFLC put the shape of output into tensor
        if (new_shape != output.shape())
@@ -821,8 +818,8 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
    {
      // Let's check the new_shape option
      auto shape = op.param().new_shape;
-    ir::Shape new_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(),
-                                                             input.shape().num_elements());
+    ir::Shape new_shape =
+      shape_inference::inferReshapeShape(shape.data(), shape.size(), input.shape().num_elements());
  
      if (new_shape != output.shape())
      {
@@ -867,7 +864,7 @@ void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
  
    // Shape inferencing logic based on Params
    ir::Shape new_shape =
-      shape_inference::inferResizeBilinearShape(input.shape(), height_out, width_out);
+    shape_inference::inferResizeBilinearShape(input.shape(), height_out, width_out);
  
    // if size_op is from Const, TFLC put the shape of output into tensor
    if (new_shape != output.shape())
@@ -898,7 +895,7 @@ void StaticShapeInferer::visit(const ir::operation::Select &op)
  
    // Select output shpae
    ir::Shape new_shape = shape_inference::inferSelectShape(
-      input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
+    input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
    output.info().shape(new_shape);
  }
  
@@ -937,11 +934,21 @@ void StaticShapeInferer::visit(const ir::operation::Slice &op)
      return;
    }
  
-  auto begins_buf = reinterpret_cast<const int32_t *>(begins.data()->base());
-  auto sizes_buf = reinterpret_cast<const int32_t *>(sizes.data()->base());
+  auto begins_buf = begins.data()->base();
+  auto sizes_buf = sizes.data()->base();
+
+  const auto begins_type = begins.typeInfo().type();
+  assert(begins_type == ir::DataType::INT32 || begins_type == ir::DataType::INT64);
+  assert(begins_type == sizes.typeInfo().type());
  
    ir::Shape new_shape =
-      shape_inference::inferSliceShape(input.info().shape(), begins_buf, sizes_buf);
+    (begins_type == ir::DataType::INT32)
+      ? shape_inference::inferSliceShape<int32_t>(input.info().shape(),
+                                                  reinterpret_cast<const int32_t *>(begins_buf),
+                                                  reinterpret_cast<const int32_t *>(sizes_buf))
+      : shape_inference::inferSliceShape<int64_t>(input.info().shape(),
+                                                  reinterpret_cast<const int64_t *>(begins_buf),
+                                                  reinterpret_cast<const int64_t *>(sizes_buf));
    output.info().shape(new_shape);
  }
  
@@ -978,7 +985,7 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
    auto padding_data = reinterpret_cast<const int32_t *>(padding.data()->base());
  
    ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
-      input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
+    input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
  
    output.info().shape(new_shape);
  }
@@ -1012,7 +1019,7 @@ void StaticShapeInferer::visit(const ir::operation::Split &op)
    assert(0 <= axis_value && axis_value < rank);
  
    ir::Shape new_shape =
-      shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
+    shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
    for (auto output_idx : outputs)
    {
      ir::Operand &output = _operands.at(output_idx);
@@ -1069,10 +1076,10 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
    auto strides_buf = reinterpret_cast<const uint32_t *>(strides.data()->base());
  
    auto op_params = shape_inference::buildStridedSliceParams(
-      starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank);
+    starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank);
  
    ir::Shape new_shape =
-      shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank);
+    shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank);
    output.info().shape(new_shape);
  }
  
@@ -1224,12 +1231,7 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
    }
  
    // re-sizing operands of body subgraph
-  StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs);
-  _lowered_subgs.at(op.param().body_subg_index)
-      ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-        bool has_dynamic_tensor = body_inferer.infer(op_seq);
-        op_seq.has_dynamic_tensor(has_dynamic_tensor);
-      });
+  inferSubgraph(op.param().body_subg_index);
  
    // Check whether while operation's shapes are predictable
    // If any of shape of body outputs and cond inputs are different, non-constant operands would be
@@ -1272,23 +1274,13 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
      }
  
      // Set non-constant operands of body subgraph to dynamic
-    StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs);
-    _lowered_subgs.at(op.param().body_subg_index)
-        ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-          bool has_dynamic_tensor = body_inferer.infer(op_seq);
-          op_seq.has_dynamic_tensor(has_dynamic_tensor);
-        });
+    inferSubgraph(op.param().body_subg_index);
    }
  
    // re-sizing operands of cond subgraph
    // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to
    // dynamic
-  StaticShapeInferer cond_inferer(op.param().cond_subg_index, _lowered_subgs);
-  _lowered_subgs.at(op.param().cond_subg_index)
-      ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-        bool has_dynamic_tensor = cond_inferer.infer(op_seq);
-        op_seq.has_dynamic_tensor(has_dynamic_tensor);
-      });
+  inferSubgraph(op.param().cond_subg_index);
  
    // re-sizing outputs of while operation
    // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h

index e42225cbff40adbec48663e176d7ed575a1120ef..2a99db7813c2d594886200b3a6db0c7282792962 100644 (file)
--- a/runtime/onert/core/src/compiler/TensorRegistries.h
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -21,9 +21,9 @@
  #include <memory>
  #include "backend/BackendContext.h"
  #include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "backend/controlflow/TensorRegistry.h"
+#include "backend/builtin/Config.h"
+#include "backend/builtin/TensorBuilder.h"
+#include "backend/builtin/TensorRegistry.h"
  
  namespace onert
  {
@@ -35,17 +35,16 @@ class TensorRegistries
  public:
    TensorRegistries() = default;
  
-  TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
-                   bool include_controlflow)
+  TensorRegistries(const onert::backend::BackendContexts &backend_contexts, bool include_builtin)
    {
      for (const auto &e : backend_contexts)
      {
        auto tensor_reg = e.second->tensor_registry;
-      if (e.first->config()->id() == backend::controlflow::Config::ID)
+      if (e.first->config()->id() == backend::builtin::Config::ID)
        {
-        _cf_tensor_reg =
-            std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
-        if (include_controlflow)
+        _builtin_tensor_reg =
+          std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(tensor_reg);
+        if (include_builtin)
            _tensor_regs.insert(tensor_reg);
        }
        else
@@ -64,9 +63,9 @@ public:
      return _tensor_regs.cend();
    }
  
-  std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+  std::shared_ptr<backend::builtin::TensorRegistry> getBuiltinTensorRegistry() const
    {
-    return _cf_tensor_reg;
+    return _builtin_tensor_reg;
    }
  
    backend::ITensor *getITensor(ir::OperandIndex ind) const
@@ -82,7 +81,7 @@ public:
  
  private:
    std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
-  std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+  std::shared_ptr<backend::builtin::TensorRegistry> _builtin_tensor_reg;
  };
  
  } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc

index ef624089472ab25ea714b09807b3d4ac8d8bee11..89dd303d49445a80cd988d873bbcbe91c99fdceb 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
@@ -17,8 +17,9 @@
  #include "ConstantInsertionPass.h"
  
  #include "backend/Backend.h"
-#include <ir/Graph.h>
-#include <util/Utils.h>
+#include "ir/Graph.h"
+#include "util/Utils.h"
+#include "util/logging.h"
  
  namespace onert
  {
@@ -29,11 +30,10 @@ namespace pass
  
  void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
  {
-  const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
-  const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
-  const auto backend = op_seq_lower_info->backend();
-  const auto layout = op_seq_lower_info->layout();
-  const auto factor = ir::operand::PermuteFactor{backend, layout};
+  const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index);
+  const auto backend = op_lower_info->backend();
+  const auto layout = op_lower_info->layout();
+  const auto factor = PermuteFactor{backend, layout};
  
    for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
    {
@@ -45,21 +45,12 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
        if (_replace_operands_map.count(key) == 0)
        {
          ir::Operand new_object(object);
-        new_object.unsetDef();
-        // TODO Remove const_case
-        const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
+        new_object.clearDefUse();
          const auto new_index = _graph.operands().emplace(new_object);
          _replace_operands_map[key] = new_index;
        }
  
        const auto replaced_input = _replace_operands_map[key];
-      // Update op_seq
-      if (_lowered_graph.op_seqs().at(op_sequence_index).getInputs().contains(input))
-      {
-        // All inputs of op_seq have the same PermuteFactor because those inputs are inputs of first
-        // operation
-        _lowered_graph.op_seqs().at(op_sequence_index).replaceInputs(input, replaced_input);
-      }
  
        // Update the same inputs of a node at once because inputs of an operation have the same
        // PermuteFactor
@@ -69,6 +60,8 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
        auto &replaced_object = _graph.operands().at(replaced_input);
        replaced_object.insertUse(node_index);
  
+      VERBOSE(ConstInsertPass) << "New operand " << replaced_input << " added(copy of " << input
+                               << ") for " << factor << std::endl;
        // Remove this node from uses of origin operand
        // Constant operand has no def.
        assert(!object.getDef().valid());
@@ -76,7 +69,11 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
  
        // Remove origin operand
        if (object.getUses().size() == 0)
+      {
          _graph.removeOperand(input);
+        VERBOSE(ConstInsertPass) << "Original operand " << input << " removed - no uses"
+                                 << std::endl;
+      }
      }
    }
  
diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h

index 052883c9218c0906e18460c4ee9ab644e3cec6d5..4911ace2fc89c4dcbc76210994f9af7f9bce0978 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
  #define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
  
-#include <ir/operand/PermuteFactor.h>
+#include <compiler/PermuteFactor.h>
  #include <ir/Index.h>
  #include "LoweredOperationPass.h"
  #include <unordered_map>
@@ -45,7 +45,7 @@ private:
    struct ReplaceKey
    {
      ir::OperandIndex index;
-    ir::operand::PermuteFactor factor;
+    PermuteFactor factor;
  
      bool operator==(const ReplaceKey &other) const
      {
@@ -61,8 +61,7 @@ private:
      std::size_t operator()(const ReplaceKey &key) const noexcept
      {
        using std::hash;
-      return hash<ir::OperandIndex>()(key.index) ^
-             (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
+      return hash<ir::OperandIndex>()(key.index) ^ (hash<PermuteFactor>()(key.factor) << 1);
      }
    };
  
diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc

index 1c1dbe0eeaed7e57465985d87c1906ec713a7db3..6ed154548cafe9a310b0f11c4b4c709da4c0d339 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
@@ -18,8 +18,9 @@
  
  #include "backend/Backend.h"
  #include <ir/Graph.h>
-#include <ir/operand/PermuteFactor.h>
+#include <compiler/PermuteFactor.h>
  #include <util/Utils.h>
+#include "util/logging.h"
  
  namespace onert
  {
@@ -30,11 +31,10 @@ namespace pass
  
  void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
  {
-  const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
-  const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
-  const auto backend = op_seq_lower_info->backend();
-  const auto layout = op_seq_lower_info->layout();
-  const auto factor = ir::operand::PermuteFactor{backend, layout};
+  const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index);
+  const auto backend = op_lower_info->backend();
+  const auto layout = op_lower_info->layout();
+  const auto factor = PermuteFactor{backend, layout};
  
    // Now this runtime does not support the node making output of operation as constant
    for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
@@ -44,9 +44,10 @@ void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Op
      {
        // All constant operand are already assinged at each backend by ContantInsertionPass. So a
        // constant has `def` and `use` as the same PermuteFactor
-      _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
-      _lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
-      _lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
+      auto operand_li = std::make_unique<compiler::OperandLowerInfo>();
+      operand_li->addDefPermuteFactor(factor);
+      operand_li->addUsePermuteFactor(factor);
+      _lowered_graph.lower_info().operand.set(input, std::move(operand_li));
      }
    }
  }
diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h

index 0c5f7d745a452b5eb5e5e84f04d4bf7f44549af8..1f1f32f6d491a4c5df94ef79a09b3a37b0515a44 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
@@ -31,7 +31,7 @@ class LoweredOperandPass : public OperandPass
  {
  public:
    LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
-      : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+    : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h

index 5c8569be2ffd72a32b647953ef2403b77447ae30..76ee3d7ff29f158e614bfd6ec1ed2575c0869de2 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
@@ -31,7 +31,7 @@ class LoweredOperationPass : public OperationPass
  {
  public:
    LoweredOperationPass(LoweredGraph &lowered_graph)
-      : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+    : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/core/src/compiler/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc

index 50c001c30fd2b3443dabae580c64273cdb2d5db9..db8ebedcdaecdae37d12f23ede0a7ac843380809 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/OperandPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc
@@ -28,7 +28,7 @@ namespace pass
  void OperandPass::run()
  {
    _graph.operands().iterate(
-      [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
+    [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
  }
  
  } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc

index d7a55cb220a197c59966d3d313c010f751d20a27..357a8798a4e96a6321f28ab89f68f6a18bd4d778 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/OperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc
@@ -30,7 +30,7 @@ namespace pass
  void OperationPass::run()
  {
    _graph.operations().iterate(
-      [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
+    [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
  }
  
  } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h

index 3f356c337e6d437ffdd94d9b01a227291c5097b9..3016df490a8c4cd469d66a9a24d389bdd2a59b5b 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/Pass.h
+++ b/runtime/onert/core/src/compiler/pass/Pass.h
@@ -24,7 +24,7 @@ namespace onert
  namespace ir
  {
  class Graph;
-} // namespace compiler
+} // namespace ir
  } // namespace onert
  
  namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.cc b/runtime/onert/core/src/compiler/pass/PassRunner.cc

index 2a058c8acd74f1f82c4bbfe9739f663da976806b..1be6d7794c7ed9fa7223c867ef276d25dcca7cfc 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/PassRunner.cc
+++ b/runtime/onert/core/src/compiler/pass/PassRunner.cc
@@ -36,7 +36,7 @@ void PassRunner::run()
      VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl;
      pass->run();
      VERBOSE(PassRunner) << "Finished running '" << pass->id() << "'" << std::endl;
-    // TODO Dump graph(LowerInfo, OpSequence, ...)?
+    // TODO Dump graph?
    }
  }
  
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc

index 504f1b9959d19a338b32043c5e65de0d97cd4c6f..181f388de50fbbefa2df19dc7638499d167ef256 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -15,7 +15,7 @@
   */
  
  #include "PermutationEliminationPass.h"
-#include "backend/controlflow/Config.h"
+#include "backend/builtin/Config.h"
  
  #include "util/logging.h"
  
@@ -39,8 +39,9 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
  
    // Check if two tensors are both portable if not, we can't eliminate the node
    {
-    auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
-    auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
+    auto &operand_li_map = _lowered_graph.lower_info().operand;
+    auto in_def_factor = operand_li_map.getRawPtr(in_operand)->def_factors().getOnlyElement();
+    auto out_def_factor = operand_li_map.getRawPtr(out_operand)->def_factors().getOnlyElement();
  
      auto in_config = in_def_factor.backend()->config();
      auto out_config = out_def_factor.backend()->config();
@@ -73,53 +74,30 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
      auto &out_operand_obj = _graph.operands().at(out_operand);
      assert(out_operand_obj.getDef() == _op_ind);
      out_operand_obj.unsetDef();
-    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-      if (!op_seq.getOutputs().contains(in_operand))
+    _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) {
+      if (!op.getOutputs().contains(in_operand))
          return;
-
-      // Update OpSequence/ir::Operation edges and ir::Operand edges
-      op_seq.replaceOutputs(in_operand, out_operand);
-      for (auto op : op_seq.operations())
-      {
-        auto &operation_obj = _graph.operations().at(op);
-        if (operation_obj.getOutputs().contains(in_operand))
-        {
-          operation_obj.replaceOutputs(in_operand, out_operand);
-          out_operand_obj.setDef(op);
-        }
-      }
+      // Update Operation and Operand edges
+      op.replaceOutputs(in_operand, out_operand);
+      out_operand_obj.setDef(op_ind);
      });
  
-    // Remove Permute operation, enclosing OpSequence and the operand
+    // Remove Permute operation and the operand
      {
        _graph.removeOperand(in_operand);
-
-      auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
-      // Assumes enclosing OpSequence contatins just this Permute operation
-      assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
-      _lowered_graph.op_seqs().remove(op_seq_ind);
        _graph.operations().remove(_op_ind);
      }
  
-    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-      if (!op_seq.getInputs().contains(in_operand))
+    _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) {
+      if (!op.getInputs().contains(in_operand))
          return;
-
-      op_seq.replaceInputs(in_operand, out_operand);
-      for (auto op : op_seq.operations())
-      {
-        auto &operation_obj = _graph.operations().at(op);
-        if (operation_obj.getInputs().contains(in_operand))
-        {
-          operation_obj.replaceInputs(in_operand, out_operand);
-          out_operand_obj.insertUse(op);
-        }
-      }
+      op.replaceInputs(in_operand, out_operand);
+      out_operand_obj.insertUse(op_ind);
      });
  
      VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
-    VERBOSE(removePermute) << "  - Input (removed) ir::Operand : " << in_operand << std::endl;
-    VERBOSE(removePermute) << "  - Output(kept)    ir::Operand : " << out_operand << std::endl;
+    VERBOSE(removePermute) << "  - Input (removed) Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(kept)    Operand : " << out_operand << std::endl;
    }
    else
    {
@@ -128,37 +106,23 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
      auto &in_operand_obj = _graph.operands().at(in_operand);
      in_operand_obj.removeUse(_op_ind);
  
-    // Make OpSequences(that use the output) use the input
-    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-      if (!op_seq.getInputs().contains(out_operand))
+    // Make operations(that use the output) use the input
+    _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) {
+      if (!op.getInputs().contains(out_operand))
          return;
-
-      op_seq.replaceInputs(out_operand, in_operand);
-      for (auto op : op_seq.operations())
-      {
-        auto &operation_obj = _graph.operations().at(op);
-        if (operation_obj.getInputs().contains(out_operand))
-        {
-          operation_obj.replaceInputs(out_operand, in_operand);
-          in_operand_obj.insertUse(op);
-        }
-      }
+      op.replaceInputs(out_operand, in_operand);
+      in_operand_obj.insertUse(op_ind);
      });
  
-    // Remove Permute operation, enclosing OpSequence and the operand
+    // Remove the Permute operation and out_operand
      {
        _graph.removeOperand(out_operand);
-
-      auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
-      // Assumes enclosing OpSequence contatins just this Permute operation
-      assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
-      _lowered_graph.op_seqs().remove(op_seq_ind);
        _graph.operations().remove(_op_ind);
      }
  
-    VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
-    VERBOSE(removePermute) << "  - Input (kept)    ir::Operand : " << in_operand << std::endl;
-    VERBOSE(removePermute) << "  - Output(removed) ir::Operand : " << out_operand << std::endl;
+    VERBOSE(removePermute) << "Permute Op removed : " << _op_ind << std::endl;
+    VERBOSE(removePermute) << "  - Input (kept)    Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(removed) Operand : " << out_operand << std::endl;
    }
  }
  
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h

index 29daf1a82cac28cc8cd68aa4c56be232a892e057..50c38c53f9c2c07f64e455a07803709bef3ea56a 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
@@ -35,7 +35,7 @@ namespace pass
   * are compatible and layouts match.
   *
   * Permute input tensor is kept and the output is removed for all the cases, except model outputs.
- * As all output tensors have to be controlflow backend, so the output is kept.
+ * As all output tensors have to be builtin backend, so the output is kept.
   *
   * @note This is an optimization pass which means that everything should work fine even if this pass
   *       was skipped.
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc

index 8467d51c832a71a1f11f94c6cf65c8c1acaadb05..6f9899114ccb50e923a1f87efbb55074dc704c5e 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -9,6 +9,7 @@
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
+
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
@@ -20,9 +21,9 @@
  #include <utility>
  #include <unordered_map>
  
-#include "backend/controlflow/Config.h"
+#include "backend/builtin/Config.h"
  #include "ir/Operand.h"
-#include "ir/operation/LowerInfo.h"
+#include "compiler/OperationLowerInfo.h"
  #include "ir/Graph.h"
  #include "backend/IConfig.h"
  #include "util/logging.h"
@@ -38,7 +39,8 @@ namespace pass
  
  void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
  {
-  auto &&operand_li = _lowered_graph.getLowerInfo(index);
+  auto &operand_li_map = _lowered_graph.lower_info().operand;
+  auto &&operand_li = operand_li_map.getRawPtr(index);
    assert(operand_li);
  
    // NOTE Later, constants also will have Def
@@ -51,7 +53,7 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
    std::list<ir::OperationIndex> permute_indexes;
  
    // Build a map for all necessary type of operands
-  std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
+  std::unordered_map<PermuteFactor, ir::OperandIndex> factor_to_index;
    {
      assert(operand_li->def_factors().size() == 1);
      for (auto factor : operand_li->def_factors())
@@ -82,26 +84,17 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
          continue;
  
        auto &operation = _graph.operations().at(use);
-      assert(_lowered_graph.op_seqs().containsOperation(use));
-      auto op_seq_index = _lowered_graph.op_seqs().getOperation(use);
-      auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
-      assert(op_seq_li);
-      const auto op_seq_layout = op_seq_li->layout();
-      const backend::Backend *backend = op_seq_li->backend();
+      auto op_li = _lowered_graph.lower_info().operation.getRawPtr(use);
+      assert(op_li);
+      const auto op_layout = op_li->layout();
+      const backend::Backend *backend = op_li->backend();
        assert(backend);
        auto use_node_inputs = operation.getInputs();
        assert(use_node_inputs.contains(index));
  
-      auto new_index = factor_to_index.at({backend, op_seq_layout});
+      auto new_index = factor_to_index.at({backend, op_layout});
        if (index != new_index)
        {
-        // Update from op_seq
-        // Replace the same inputs of an OpSequence at once for the following reasons:
-        // 1. An OpSequence's inputs are the same inputs of first operation
-        // 2. An OpSequence may have inputs as the same operand (2 or more).
-        // 3. The same inputs of OpSequence have the same PermuteFactor.
-        _lowered_graph.op_seqs().at(op_seq_index).replaceInputs(index, new_index);
-
          // Update from operation
          // Replace the same inputs of an operation at once for the following reasons:
          // No. 2 and 3 above
@@ -109,7 +102,7 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
  
          // Update from operand
          remove_list.push_back(
-            use); // Removal should be done in another loop since we are in the loop
+          use); // Removal should be done in another loop since we are in the loop
          _graph.operands().at(new_index).insertUse(use);
        }
      }
@@ -122,52 +115,52 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
  }
  
  ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
-                                                           const ir::operand::PermuteFactor &factor)
+                                                           const PermuteFactor &factor)
  {
-  assert(!_graph.isBuildingPhase());
-
    auto &operand = _graph.operands().at(operand_index);
  
    // Generate output operand and permute operation
    auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
-  // change model output if operand_index is model output index and the out operand is controlflow
+  // change model output if operand_index is model output index and the out operand is builtin
    // backend
    auto &model_outputs = _graph.getOutputs();
-  const backend::Backend *cf_backend = compiler::BackendManager::get().getControlflow();
-  if (model_outputs.contains(operand_index) && factor.backend() == cf_backend)
+  const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin();
+  if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend)
    {
      model_outputs.replace(operand_index, out_operand_index);
    }
  
+  auto &operand_li_map = _lowered_graph.lower_info().operand;
+
    // Find Permute information
-  auto input_factor = _lowered_graph.getLowerInfo(operand_index)->def_factors().getOnlyElement();
+  auto input_factor = operand_li_map.getRawPtr(operand_index)->def_factors().getOnlyElement();
    auto input_backend = input_factor.backend();
    auto output_backend = factor.backend();
    // NOTE Permute may not have specific layout because the layout of input and output may be
    // different.
    const auto permute_node_layout = ir::Layout::UNKNOWN;
    // NOTE If one backend supports several layout, the backend must support Permute operation
-  const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
+  const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin();
    if (input_backend == output_backend)
    {
      permute_node_backend = input_backend;
    }
-  const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+  const PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
  
    // Update LowerInfo of input operand
-  auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
+  auto operand_lower_info = operand_li_map.getRawPtr(operand_index);
    operand_lower_info->removeUsePermuteFactor(factor);
    operand_lower_info->addUsePermuteFactor(permute_node_factor);
  
    // Update LowerInfo of output operand
-  auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
+  auto out_operand_li = std::make_unique<compiler::OperandLowerInfo>();
  
    // The input and output factors of all nodes will be the same except Permute. So Tensor's
    // allocators allocates memory using only the information of def permutation factor now.
    // TODO Change param to permute_node_factor
    out_operand_li->addDefPermuteFactor(factor);
    out_operand_li->addUsePermuteFactor(factor);
-  _lowered_graph.setLowerInfo(out_operand_index, std::move(out_operand_li));
+  operand_li_map.set(out_operand_index, std::move(out_operand_li));
  
    // Insert permute operation to the graph
    const auto input_layout = input_factor.layout();
@@ -190,7 +183,6 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
    auto insert_node = std::make_unique<Permute>(operand_index, out_operand_index, permute_type);
  
    auto node_index = _graph.operations().push(std::move(insert_node));
-  const auto &node = _graph.operations().at(node_index);
  
    VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl;
    VERBOSE_F() << "  - Input (original) Operand : " << operand_index << "("
@@ -198,14 +190,11 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
    VERBOSE_F() << "  - Output(inserted) Operand : " << out_operand_index << "("
                << factor.backend()->config()->id() << ")" << std::endl;
  
-  // OpSequence
+  // Operation LowerInfo
    {
-    auto op_seq_index = _lowered_graph.op_seqs().emplace(node_index, permute_node_layout);
-    auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
-    op_seq.setInputs(node.getInputs());
-    op_seq.setOutputs(node.getOutputs());
-    _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
-                                                  permute_node_backend, permute_node_layout));
+    auto &operation_li_map = _lowered_graph.lower_info().operation;
+    operation_li_map.set(node_index, std::make_unique<compiler::OperationLowerInfo>(
+                                       permute_node_backend, permute_node_layout));
    }
  
    // Update Use/Def info
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h

index 7585153858cbc82359c40a3f465013268f4173f8..ee0a1464c52411ac1e20598e4aa8e645bcdea0c3 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
@@ -20,7 +20,7 @@
  #include "LoweredOperandPass.h"
  #include "compiler/BackendManager.h"
  #include "ir/Operand.h"
-#include "ir/operand/PermuteFactor.h"
+#include "compiler/PermuteFactor.h"
  
  namespace onert
  {
@@ -48,7 +48,7 @@ private:
     * @return ir::OperationIndex
     */
    ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
-                                   const ir::operand::PermuteFactor &factor);
+                                   const PermuteFactor &factor);
  };
  
  } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc

index 93d12530760fc4fa93212b8895c72ccec146cbb1..f83b1ba3127a9e1bc2af6c409a74be4739a3879a 100644 (file)
--- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
@@ -33,7 +33,7 @@ using namespace ir;
  void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
  {
    node.accept(*this);
-};
+}
  
  // TODO Remove this. Expanding ranks of Operand is dangerous
  void PermutationOperationPass::applyExpandRanks(const Operation &node)
@@ -43,9 +43,8 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node)
  
    assert(output.getDef().valid());
    const auto node_index = output.getDef();
-  const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
-  const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
-  const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+  const auto frontend_layout = _graph.layout();
+  const auto backend_layout = _lowered_graph.lower_info().operation.getRawPtr(node_index)->layout();
  
    if (frontend_layout == backend_layout)
    {
@@ -84,10 +83,11 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
  
    assert(output_obj.getDef().valid());
    const auto node_index = output_obj.getDef();
-  const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
  
-  const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
-  const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+  auto &operation_li_map = _lowered_graph.lower_info().operation;
+  auto &operand_li_map = _lowered_graph.lower_info().operand;
+  const auto frontend_layout = _graph.layout();
+  const auto backend_layout = operation_li_map.getRawPtr(node_index)->layout();
  
    if (frontend_layout == backend_layout)
    {
@@ -97,96 +97,27 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
    // Permutation changing layout beyond 4-D is not supported yet
    assert(output_obj.shape().rank() <= 4);
  
-  // Divide op_seq based on target operation
+  // Change PermuteFactors of operands and the operation of target node
    {
-    auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
-    auto &operations = _lowered_graph.graph().operations();
-
-    // Create new op_seq and move information from existing op_seq to new op_seq if target
-    // node is the end of op_seq
-    auto it = prev_op_seq.begin();
-    // Find iterator of target node in op_seq
-    while (*(it++) != node_index)
-      ;
-    if (it != prev_op_seq.end())
-    {
-      const auto &target_op_idx = *it;
-      const auto &target_node = operations.at(target_op_idx);
-      const auto &next_op_seq_index =
-          _lowered_graph.op_seqs().emplace(target_op_idx, prev_op_seq.getLayout());
-      auto &next_op_seq = _lowered_graph.op_seqs().at(next_op_seq_index);
-      next_op_seq.setInputs(target_node.getInputs());
-      next_op_seq.setOutputs(target_node.getOutputs());
-
-      std::vector<OperationIndex> remove_list;
-      remove_list.emplace_back(target_op_idx);
-      while (++it != prev_op_seq.end())
-      {
-        next_op_seq.appendOperation(target_op_idx);
-        next_op_seq.setOutputs(target_node.getOutputs());
-        remove_list.emplace_back(target_op_idx);
-      }
+    const auto op_li = operation_li_map.getRawPtr(node_index);
+    const auto backend = op_li->backend();
  
-      prev_op_seq.setOutputs(node.getOutputs());
-      for (const auto &index : remove_list)
-      {
-        prev_op_seq.remove(index);
-      }
-
-      const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
-      _lowered_graph.setLowerInfo(
-          next_op_seq_index,
-          std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
-    }
-  }
-
-  // Remove target operation from op_seq and insert the target operation to new op_seq
-  {
-    const auto backend = _lowered_graph.getLowerInfo(op_seq_index)->backend();
+    operation_li_map.set(node_index,
+                         std::make_unique<compiler::OperationLowerInfo>(backend, frontend_layout));
  
-    // Remove target operation from op_sequence
-    _lowered_graph.op_seqs().removeFromOpSequence(node_index);
-
-    if (!_lowered_graph.op_seqs().exist(op_seq_index))
-    {
-      // Remove lowerinfo for op_seq of target operation if the op_seq does not exist
-      _lowered_graph.removeLowerInfo(op_seq_index);
-    }
-    else
-    {
-      // Update op_seq of target operation if the op_seq exists
-      auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
-      const auto &last_node_idx = *(--prev_op_seq.end());
-      const auto &last_node = _lowered_graph.graph().operations().at(last_node_idx);
-      prev_op_seq.setOutputs(last_node.getOutputs());
-    }
-
-    // Create new op_seq and set information to the op_seq
-    auto new_op_seq_index = _lowered_graph.op_seqs().emplace(node_index, frontend_layout);
-    auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
-    new_op_seq.setInputs(node.getInputs());
-    new_op_seq.setOutputs(node.getOutputs());
-    _lowered_graph.setLowerInfo(
-        new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
-  }
-
-  // Change PermuteFactors of operands of target node
-  {
-    const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
-    const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
-    const auto backend = op_seq_li->backend();
-    const operand::PermuteFactor removed_factor{backend, backend_layout};
-    const operand::PermuteFactor new_factor{backend, frontend_layout};
+    const PermuteFactor removed_factor{backend, backend_layout};
+    const PermuteFactor new_factor{backend, frontend_layout};
      for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
      {
+      // Check if it can be removed by checking if the operand is used by another operation and
+      // it uses the same backend and layout
        bool canRemove = true;
        for (const auto &use : _graph.operands().at(input).getUses())
        {
          if (use != node_index)
          {
-          const auto &use_op_seq_index = _lowered_graph.op_seqs().getOperation(use);
-          auto use_op_seq_li = _lowered_graph.getLowerInfo(use_op_seq_index);
-          if (use_op_seq_li->backend() == backend && use_op_seq_li->layout() == backend_layout)
+          auto use_op_li = operation_li_map.getRawPtr(use);
+          if (use_op_li->backend() == backend && use_op_li->layout() == backend_layout)
            {
              canRemove = false;
              break;
@@ -194,27 +125,27 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
          }
        }
  
-      auto lower_info = _lowered_graph.getLowerInfo(input);
+      auto input_li = operand_li_map.getRawPtr(input);
        if (canRemove)
        {
-        lower_info->removeUsePermuteFactor(removed_factor);
+        input_li->removeUsePermuteFactor(removed_factor);
        }
-      lower_info->addUsePermuteFactor(new_factor);
+      input_li->addUsePermuteFactor(new_factor);
  
        // Whether if node's input is an input of model or a constant
        if (!_graph.operands().at(input).getDef().valid() &&
-          (lower_info->def_factors().size() == 1 &&
-           lower_info->def_factors().getOnlyElement() == removed_factor))
+          (input_li->def_factors().size() == 1 &&
+           input_li->def_factors().getOnlyElement() == removed_factor))
        {
          assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant());
-        lower_info->removeDefPermuteFactor(removed_factor);
-        lower_info->addDefPermuteFactor(new_factor);
+        input_li->removeDefPermuteFactor(removed_factor);
+        input_li->addDefPermuteFactor(new_factor);
        }
      }
  
      for (const auto &output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
      {
-      auto lower_info = _lowered_graph.getLowerInfo(output);
+      auto lower_info = operand_li_map.getRawPtr(output);
        lower_info->removeDefPermuteFactor(removed_factor);
        lower_info->addDefPermuteFactor(new_factor);
  
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc

new file mode 100644 (file)

index 0000000..35fb575
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pass.h"
+
+#include "UnusedOperandEliminationPass.h"
+#include "ir/Index.h"
+#include "util/Set.h"
+#include "ir/Graph.h"
+
+/**
+ * @file  UnusedOperandEliminationPass.cc
+ * @brief This file contains UnusedOperandEliminationPass class implementation
+ */
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void UnusedOperandEliminationPass::run()
+{
+  util::Set<ir::OperandIndex> used;
+
+  _graph.operations().iterate([&](const ir::OperationIndex &, const ir::Operation &node) {
+    for (auto ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED)
+    {
+      used.add(ind);
+    }
+  });
+
+  // Graph's inputs/outputs are always considered as used
+  for (auto ind : (_graph.getInputs() + _graph.getOutputs()) | ir::Remove::UNDEFINED)
+  {
+    used.add(ind);
+  }
+
+  _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+    if (!used.contains(ind))
+    {
+      VERBOSE() << "Remove unused operand " << ind << std::endl;
+      _graph.operands().remove(ind);
+    }
+  });
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h

new file mode 100644 (file)

index 0000000..8078f42
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file  UnusedOperandEliminationPass.h
+ * @brief This file contains UnusedOperandEliminationPass class
+ */
+
+#ifndef __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
+
+#include "Pass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief  A pass to eliminate unused operands from the graph
+ *
+ * Remove operands that are not used by any operations, except Graph inputs/outputs.
+ *
+ */
+class UnusedOperandEliminationPass : public Pass
+{
+public:
+  using Pass::Pass;
+
+public:
+  std::string id() override { return "UnusedOperandEliminationPass"; }
+  void run() final;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.cc b/runtime/onert/core/src/dumper/dot/DotBuilder.cc

index 38a69696e162c3a625e977d55c0787eb746b4d0d..d4e4d54845f9984c21f5b33976aaa4d12ae8f126 100644 (file)
--- a/runtime/onert/core/src/dumper/dot/DotBuilder.cc
+++ b/runtime/onert/core/src/dumper/dot/DotBuilder.cc
@@ -35,25 +35,6 @@ void DotBuilder::update(const Node &node_info)
    }
  }
  
-void DotBuilder::addOpSequence(const DotSubgraphInfo &subgraph_info)
-{
-  _dot << "subgraph cluster_" << subgraph_info.index().value() << " {\n";
-  _dot << "  label=\"" << subgraph_info.label() << "\";\n";
-  _dot << "  style=filled;\n";
-  _dot << "  color=lightgrey;\n";
-  _dot << "  ";
-  for (auto op : subgraph_info.operations())
-  {
-    _dot << "operation" << op.value() << "; ";
-  }
-  for (auto op : subgraph_info.operands())
-  {
-    _dot << "operand" << op.value() << "; ";
-  }
-  _dot << "\n";
-  _dot << "}\n";
-}
-
  void DotBuilder::writeDot(std::ostream &os)
  {
    os << "digraph D {\n"
diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.h b/runtime/onert/core/src/dumper/dot/DotBuilder.h

index 681cbbf5da73322d36024575fd52e5d225f540a6..30f32f8f91ff951ad6db4a9bf74115346757f34c 100644 (file)
--- a/runtime/onert/core/src/dumper/dot/DotBuilder.h
+++ b/runtime/onert/core/src/dumper/dot/DotBuilder.h
@@ -25,7 +25,6 @@
  
  #include "OperationNode.h"
  #include "OperandNode.h"
-#include "DotSubgraphInfo.h"
  
  using Operation = onert::ir::Operation;
  using Object = onert::ir::Operand;
@@ -44,7 +43,6 @@ public:
  
  public:
    void update(const Node &dotinfo);
-  void addOpSequence(const DotSubgraphInfo &subgraph_info);
  
    void writeDot(std::ostream &os);
  
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc

index fdf5c6eaae76d2de9cec547f3bbda66c3f42fa23..714fb6fda8d0c4c235af8e4335cd3ccce68d5ea3 100644 (file)
--- a/runtime/onert/core/src/dumper/dot/DotDumper.cc
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc
@@ -19,8 +19,6 @@
  
  #include "DotDumper.h"
  #include "DotBuilder.h"
-#include "DotSubgraphInfo.h"
-#include "ir/OpSequence.h"
  #include "ir/OperationIndexMap.h"
  #include "backend/Backend.h"
  #include "backend/IConfig.h"
@@ -82,7 +80,7 @@ void DotDumper::dump(const std::string &tag)
      else
      {
        showing_cond =
-          !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index);
+        !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index);
      }
      if (showing_cond)
      {
@@ -105,7 +103,7 @@ void DotDumper::dump(const std::string &tag)
          std::string fillcolor = "";
          if (_lowered_graph)
          {
-          auto lower_info = _lowered_graph->getLowerInfo(index);
+          auto lower_info = _lowered_graph->lower_info().operand.getRawPtr(index);
            const auto &def_factors = lower_info->def_factors();
            if (def_factors.size() > 0)
            {
@@ -151,25 +149,18 @@ void DotDumper::dump(const std::string &tag)
  
    if (_lowered_graph)
    {
-    const auto &op_seqs = _lowered_graph->op_seqs();
-    op_seqs.iterate([&](const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq) {
-      const auto lower_info = _lowered_graph->getLowerInfo(index);
-      auto fillcolor = backend_to_fillcolor(lower_info->backend());
-      std::string label =
-          std::to_string(index.value()) + " [" + lower_info->backend()->config()->id() + "]";
-      DotSubgraphInfo subgraph_info{index, op_seq, shown_operand_set, _graph.operations()};
-      subgraph_info.label(label);
-      subgraph_info.fillcolor(fillcolor);
-      dot_builder.addOpSequence(subgraph_info);
-
-      // Set fillcolor of all operations in the op_seq
-      for (const auto &op_idx : op_seq.operations())
+    _graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+      const auto lower_info = _lowered_graph->lower_info().operation.getRawPtr(index);
+      if (lower_info)
        {
-        auto found = operation_nodes.find(op_idx);
-        if (found != operation_nodes.end())
+        auto fillcolor = backend_to_fillcolor(lower_info->backend());
+        std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
+        auto itr = operation_nodes.find(index);
+        if (itr != operation_nodes.end())
          {
-          auto &&op = found->second;
-          op->setAttribute("fillcolor", fillcolor);
+          auto &node = itr->second;
+          node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
+          node->setAttribute("fillcolor", fillcolor);
          }
        }
      });
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h

index fdbca1642d3b48dcb890a35435f63cf48db7286a..f300c3432668e7c1c1f3f0923ecbf7f042e36cbb 100644 (file)
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -39,11 +39,11 @@ public:
  
  public:
    DotDumper(const ir::Graph &graph, Level level)
-      : _lowered_graph{nullptr}, _graph(graph), _level{level}
+    : _lowered_graph{nullptr}, _graph(graph), _level{level}
    {
    }
    DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
-      : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
+    : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
    {
    }
  
diff --git a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc b/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc

deleted file mode 100644 (file)

index 52e9c75..0000000
--- a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DotSubgraphInfo.h"
-
-#include <sstream>
-
-namespace onert
-{
-namespace dumper
-{
-namespace dot
-{
-
-DotSubgraphInfo::DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
-                                 const util::Set<ir::OperandIndex> &shown_operands,
-                                 const ir::Operations &operations_ctx)
-    : _index{index}
-{
-  for (const auto &op_idx : op_seq.operations())
-  {
-    _operations.insert(op_idx);
-    const auto &node = operations_ctx.at(op_idx);
-    for (auto o : node.getInputs())
-    {
-      // Must be a shown operand, not op_seq's inputs
-      if (shown_operands.contains(o) && !op_seq.getInputs().contains(o))
-      {
-        _operands.insert(o);
-      }
-    }
-    for (auto o : node.getOutputs())
-    {
-      // Must be a shown operand, not op_seq's inputs
-      if (shown_operands.contains(o) && !op_seq.getOutputs().contains(o))
-      {
-        _operands.insert(o);
-      }
-    }
-  }
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h b/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h

deleted file mode 100644 (file)

index 95ba895..0000000
--- a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-#define __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-
-#include <unordered_set>
-
-#include "ir/Index.h"
-#include <ir/Operations.h>
-#include "ir/OpSequence.h"
-#include "util/Set.h"
-
-namespace onert
-{
-namespace dumper
-{
-namespace dot
-{
-
-class DotSubgraphInfo
-{
-public:
-  DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
-                  const util::Set<ir::OperandIndex> &shown_operands,
-                  const ir::Operations &operations_ctx);
-
-  ir::OpSequenceIndex index() const { return _index; }
-  std::string label() const { return _label; }
-  void label(const std::string &val) { _label = val; }
-  std::string fillcolor() const { return _fillcolor; }
-  void fillcolor(const std::string &val) { _fillcolor = val; }
-  const std::unordered_set<ir::OperationIndex> &operations() const { return _operations; }
-  const std::unordered_set<ir::OperandIndex> &operands() const { return _operands; }
-
-private:
-  ir::OpSequenceIndex _index;
-  std::string _label;
-  std::string _fillcolor;
-  std::unordered_set<ir::OperationIndex> _operations;
-  std::unordered_set<ir::OperandIndex> _operands;
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace onert
-
-#endif // __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.cc b/runtime/onert/core/src/dumper/dot/OperandNode.cc

index 5a6015ca9d7b59e7f5f6eb6981ef2bf7442a3ba7..88f5254f3c1327a4cf71c9175fa6161d589e197b 100644 (file)
--- a/runtime/onert/core/src/dumper/dot/OperandNode.cc
+++ b/runtime/onert/core/src/dumper/dot/OperandNode.cc
@@ -18,7 +18,6 @@
  
  #include "OperandNode.h"
  #include "ir/Graph.h"
-#include "ir/operand/LowerInfo.h"
  
  namespace onert
  {
@@ -33,7 +32,7 @@ const std::string Operand::OPERAND_SHAPE = "ellipse";
  const std::string Operand::BG_COLOR_SCHEME = "set18";
  
  Operand::Operand(const ir::OperandIndex &index, Type type)
-    : Node{"operand" + std::to_string(index.value())}
+  : Node{"operand" + std::to_string(index.value())}
  {
    {
      auto type_to_shape = [](Type type) {
diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.h b/runtime/onert/core/src/dumper/dot/OperandNode.h

index 2e7cc5861b5fc8013d19aef2a6b4e2ff87b433b7..f2aea80ad1b314f8e39474faa9cc90acf2e9d86a 100644 (file)
--- a/runtime/onert/core/src/dumper/dot/OperandNode.h
+++ b/runtime/onert/core/src/dumper/dot/OperandNode.h
@@ -64,7 +64,6 @@ public:
     *
     * @param[in] index Operand index
     * @param[in] type Operand type
-   * @param[in] lower_info Operand LowerInfo
     */
    Operand(const ir::OperandIndex &index, Type type);
  
diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.cc b/runtime/onert/core/src/dumper/dot/OperationNode.cc

index bee137e7c64a90de5b6dd427fc69fecd8106ced8..87c5ba148611cd81b8fef24b1dada7e8c5a29538 100644 (file)
--- a/runtime/onert/core/src/dumper/dot/OperationNode.cc
+++ b/runtime/onert/core/src/dumper/dot/OperationNode.cc
@@ -18,7 +18,6 @@
  
  #include "OperationNode.h"
  #include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
  #include "backend/IConfig.h"
  #include "backend/Backend.h"
  
@@ -33,7 +32,7 @@ const std::string Operation::OPERATION_SHAPE = "rect";
  const std::string Operation::BG_COLOR_SCHEME = "pastel18";
  
  Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node)
-    : Node{"operation" + std::to_string(index.value())}
+  : Node{"operation" + std::to_string(index.value())}
  {
    setAttribute("label", std::to_string(index.value()) + " : " + node.name());
    setAttribute("shape", OPERATION_SHAPE);
diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.cc b/runtime/onert/core/src/dumper/text/GraphDumper.cc

new file mode 100644 (file)

index 0000000..80cfbbc
--- /dev/null
+++ b/runtime/onert/core/src/dumper/text/GraphDumper.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphDumper.h"
+
+#include "ir/Graph.h"
+#include "compiler/LoweredGraph.h"
+#include "util/logging.h"
+#include "misc/string_helpers.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace text
+{
+
+namespace
+{
+
+std::string formatOperandIndexSequence(const ir::OperandIndexSequence &seq)
+{
+  std::vector<std::string> strs;
+  for (auto ind : seq)
+    strs.push_back(dumper::text::formatOperandBrief(ind));
+  return nnfw::misc::join(strs.begin(), strs.end(), ", ");
+}
+
+} // namespace
+
+std::string formatOperandBrief(ir::OperandIndex ind)
+{
+  std::stringstream ss;
+  ss << ind;
+  return ss.str();
+}
+
+std::string formatOperand(const ir::Graph &, ir::OperandIndex ind)
+{
+  std::stringstream ss;
+  ss << ind;
+  // TODO Print shape, type and maybe more
+  return ss.str();
+}
+
+std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind)
+{
+  std::stringstream ss;
+  const auto &op = graph.operations().at(ind);
+
+  ss << formatOperandIndexSequence(op.getOutputs());
+  ss << " = ";
+  ss << ind << "_" << op.name() << "(";
+  ss << formatOperandIndexSequence(op.getInputs());
+  ss << ")";
+  return ss.str();
+}
+
+void dumpGraph(const ir::Graph &graph)
+{
+  VERBOSE(GraphDumper) << "{\n";
+  auto ops_topol = graph.topolSortOperations();
+  for (auto op_ind : ops_topol)
+  {
+    VERBOSE(GraphDumper) << "  " << formatOperation(graph, op_ind) << "\n";
+  }
+  VERBOSE(GraphDumper) << "}\n";
+  VERBOSE(GraphDumper) << std::endl;
+}
+
+void dumpLoweredGraph(const compiler::LoweredGraph &lgraph)
+{
+  // TODO Graph dump with backend info
+  dumpGraph(lgraph.graph());
+}
+
+} // namespace text
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.h b/runtime/onert/core/src/dumper/text/GraphDumper.h

new file mode 100644 (file)

index 0000000..0501ff0
--- /dev/null
+++ b/runtime/onert/core/src/dumper/text/GraphDumper.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
+#define __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
+
+#include <ir/Index.h>
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+}
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+class LoweredGraph;
+}
+} // namespace onert
+
+namespace onert
+{
+namespace dumper
+{
+namespace text
+{
+
+std::string formatOperandBrief(ir::OperandIndex ind);
+std::string formatOperand(const ir::Graph &, ir::OperandIndex ind);
+std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind);
+void dumpGraph(const ir::Graph &graph);
+void dumpLoweredGraph(const compiler::LoweredGraph &lgraph);
+
+} // namespace text
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc

index b81a75794163a55e064a99913fe305303172c83c..8dac1219e471a0f7f9b91656576fd4db80876f74 100644 (file)
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -54,8 +54,7 @@ void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id)
  {
    auto &job = _waiting_jobs[id];
    assert(job != nullptr);
-  auto &op_seq = _lowered_graph->op_seqs().at(_job_to_op_seq[job->index()]);
-  auto rank = calculateRank(op_seq.operations());
+  auto rank = calculateRank({_job_to_op[job->index()]});
    _ready_jobs.emplace(rank, std::move(job));
  }
  
@@ -78,48 +77,48 @@ bool DataflowExecutor::noWaitingJobs()
  }
  
  DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                                   backend::BackendContexts &&backend_contexts,
                                     const compiler::TensorRegistries &tensor_regs,
                                     compiler::CodeMap &&code_map,
                                     const util::TracingCtx *tracing_ctx)
-    : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx},
-      _code_map{std::move(code_map)}
+  : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx},
+    _code_map{std::move(code_map)}
  {
    VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
  
-  const auto &op_seqs = _lowered_graph->op_seqs();
-  // Assign jobs convert OpSequenceIndex to job index(uint32_t)
+  // Assign jobs convert OperationIndex to job index(uint32_t)
    uint32_t next_job_index = 0;
-  std::unordered_map<ir::OpSequenceIndex, uint32_t> op_seq_to_job;
-  op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &) {
-    VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with OpSequenceIndex "
-                              << op_seq_index.value() << std::endl;
+  std::unordered_map<ir::OperationIndex, uint32_t> op_to_job;
+  const auto &operations = _lowered_graph->graph().operations();
+  operations.iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &) {
+    VERBOSE(DataflowExecutor) << "Create a job " << next_job_index << " with Operation " << op_ind
+                              << std::endl;
      _finished_jobs.emplace_back(
-        std::make_unique<Job>(next_job_index, _code_map.at(op_seq_index).fn_seq.get()));
-    op_seq_to_job[op_seq_index] = next_job_index++;
+      std::make_unique<Job>(next_job_index, _code_map.at(op_ind).fn_seq.get()));
+    op_to_job[op_ind] = next_job_index++;
    });
  
    _waiting_jobs.resize(next_job_index);
    _output_info.resize(next_job_index);
    _initial_input_info.resize(next_job_index, 0);
  
-  op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
-    auto job_index = op_seq_to_job[op_seq_index];
-    for (auto output : op_seq.getOutputs())
+  operations.iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &op) {
+    auto job_index = op_to_job[op_ind];
+    for (auto output : op.getOutputs())
      {
        // Update output and input info
-      op_seqs.iterate(
-          [&](const ir::OpSequenceIndex &op_seq_cur_index, const ir::OpSequence &op_seq_cur) {
-            if (op_seq_cur.getInputs().contains(output))
-            {
-              auto dep_index = op_seq_to_job[op_seq_cur_index];
-              ++_initial_input_info[dep_index];
-              _output_info[job_index].push_back(dep_index);
-            }
-          });
+      operations.iterate([&](const ir::OperationIndex &op_cur_ind, const ir::Operation &op_cur) {
+        if (op_cur.getInputs().contains(output))
+        {
+          auto dep_index = op_to_job[op_cur_ind];
+          ++_initial_input_info[dep_index];
+          _output_info[job_index].push_back(dep_index);
+        }
+      });
      }
    });
-  for (const auto &s : op_seq_to_job)
-    _job_to_op_seq.emplace(s.second, s.first);
+  for (const auto &s : op_to_job)
+    _job_to_op.emplace(s.second, s.first);
  
    _input_info = _initial_input_info;
  }
@@ -151,24 +150,23 @@ void DataflowExecutor::executeImpl()
      auto job = std::move((_ready_jobs.begin())->second);
      _ready_jobs.erase(_ready_jobs.begin());
      auto job_index = job->index();
-    VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl;
+    VERBOSE(DataflowExecutor) << "Run job " << job_index << std::endl;
  
-    auto op_seq_index = _job_to_op_seq[job_index];
-    auto op_seq = &_lowered_graph->op_seqs().at(op_seq_index);
-    const backend::Backend *backend =
-        _lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend();
+    auto op_ind = _job_to_op[job_index];
+    const backend::Backend *backend = _lowered_graph->lower_info().operation.at(op_ind).backend();
  
-    _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+    _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
  
      job->fn_seq()->initRunning();
  
      // check if FunctionSequence needs to handle dynamic tensor
-    bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
+    bool handle_dynamic_tensor =
+      _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
      job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
  
      job->run();
  
-    _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
+    _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
      notify(job_index);
      _finished_jobs[job_index] = std::move(job);
    }
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h

index b72c0d030a4452bd17c21616e8394f8d6e6ccb78..bcac19d2e98524d914587bb1d8ecf55088ba983d 100644 (file)
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -48,9 +48,10 @@ public:
     *
     * @param lowered_graph LoweredGraph object
     * @param tensor_builders Tensor builders that are currently used
-   * @param code_map OpSequence and its code map
+   * @param code_map @c ir::Operation and its code map
     */
    DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                   backend::BackendContexts &&backend_contexts,
                     const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
                     const util::TracingCtx *tracing_ctx);
  
@@ -87,7 +88,7 @@ protected:
    std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs;
  
    /// @brief Which job runs which op and function.
-  std::unordered_map<uint32_t, ir::OpSequenceIndex> _job_to_op_seq;
+  std::unordered_map<uint32_t, ir::OperationIndex> _job_to_op;
  };
  
  } // namespace exec
diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc

index 2d9d534f1a6f7f9610ebff3f35b236f824e9f791..dbf4eb28f2afda574f610d176f90ac71acf93e29 100644 (file)
--- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
@@ -48,12 +48,12 @@ void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
  
      So, only when all inputs are static, we can skip dynamic shape inference.
    */
-  if ((!lhs->is_dynamic()) && (!rhs->is_dynamic()))
-    return;
-
    auto output_idx = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_idx);
  
+  if ((currently_static(lhs) && currently_static(rhs)) && previously_static(output))
+    return;
+
    ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
  
    output->applyShape(new_shape);
@@ -144,7 +144,7 @@ void DynamicShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
    const auto &input = _tensor_registry->getITensor(input_idx);
  
    const auto cluster_idx{
-      op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+    op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
    const auto &cluster = _tensor_registry->getITensor(cluster_idx);
    assert(cluster->is_constant());
  
@@ -158,7 +158,7 @@ void DynamicShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
    assert(cluster_buf);
  
    ir::Shape new_shape =
-      shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
+    shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
  
    auto output_ind = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_ind);
@@ -222,7 +222,7 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
    assert(shape); // It shouldn't be 0.
  
    auto output_shape = shape_inference::inferBroadcastToShape(
-      shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
+    shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
  
    // set output shape and output buffer
    output->applyShape(output_shape);
@@ -270,15 +270,17 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op)
    {
      auto isConcatible = [](const backend::ITensor *input1, const backend::ITensor *input2,
                             int32_t axis) {
-      if (input1->num_dimensions() != input2->num_dimensions())
+      auto shape1 = input1->getShape();
+      auto shape2 = input2->getShape();
+      if (shape1.rank() != shape2.rank())
          return false;
  
-      for (size_t i = 0; i < input1->num_dimensions(); i++)
+      for (int i = 0; i < shape1.rank(); i++)
        {
-        auto positive_axis = (axis >= 0) ? axis : axis + input1->num_dimensions();
+        auto positive_axis = (axis >= 0) ? axis : axis + input1->getShape().rank();
  
          if (i != positive_axis)
-          if (input1->dimension(i) != input2->dimension(i))
+          if (shape1.dim(i) != shape2.dim(i))
              return false;
        }
  
@@ -393,9 +395,9 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
  
    assert(axis->buffer());
    int32_t axis_value =
-      (axis_type == ir::DataType::INT32)
-          ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
-          : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
+    (axis_type == ir::DataType::INT32)
+      ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
+      : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
  
    auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value);
  
@@ -422,10 +424,10 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op)
  
    const auto &dims_shape = shape->getShape();
    auto output_shape = ((dims_type == ir::DataType::INT32)
-                           ? shape_inference::inferFillShape<int32_t>(
-                                 dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
-                           : shape_inference::inferFillShape<int64_t>(
-                                 dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
+                         ? shape_inference::inferFillShape<int32_t>(
+                             dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+                         : shape_inference::inferFillShape<int64_t>(
+                             dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
  
    output->applyShape(output_shape);
    assert(output->buffer() != nullptr);
@@ -497,7 +499,7 @@ void DynamicShapeInferer::visit(const ir::operation::LSTM &op)
    auto output = _tensor_registry->getITensor(output_index);
  
    const auto output_state_out_index{
-      op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+    op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
  
    const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
  
@@ -517,19 +519,19 @@ void DynamicShapeInferer::visit(const ir::operation::LSTM &op)
    const auto input_shape = input->getShape();
  
    const auto input_to_output_weights_index{
-      op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+    op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
    const auto input_to_output_weights = _tensor_registry->getITensor(input_to_output_weights_index);
    const auto input_to_output_weights_shape = input_to_output_weights->getShape();
  
    const auto recurrent_to_output_weights_index{
-      op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+    op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
    const auto recurrent_to_output_weights =
-      _tensor_registry->getITensor(recurrent_to_output_weights_index);
+    _tensor_registry->getITensor(recurrent_to_output_weights_index);
    const auto recurrent_to_output_weights_shape = recurrent_to_output_weights->getShape();
  
    // re-sizing outputs
    const int n_batch =
-      (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
+    (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
    const int n_cell = input_to_output_weights_shape.dim(0);
    const int n_output = recurrent_to_output_weights_shape.dim(1);
    if (input_shape.rank() == 3)
@@ -564,19 +566,19 @@ void DynamicShapeInferer::visit(const ir::operation::LSTM &op)
    if (scratch_buffer != nullptr)
    {
      const auto input_to_input_weights_index{
-        op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+      op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
      const auto recurrent_to_input_weights_index{
-        op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+      op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
  
      const auto input_to_input_weights_shape =
-        _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
+      _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
      bool has_input_to_input_weights =
-        input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
+      input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
  
      const auto recurrent_to_input_weights_shape =
-        _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
-    bool has_recurrent_to_input_weights = recurrent_to_input_weights_shape.dim(0) != 0 &&
-                                          recurrent_to_input_weights_shape.dim(1) != 0;
+      _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
+    bool has_recurrent_to_input_weights =
+      recurrent_to_input_weights_shape.dim(0) != 0 && recurrent_to_input_weights_shape.dim(1) != 0;
  
      // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
      // true: no CIFG
@@ -681,7 +683,7 @@ void DynamicShapeInferer::visit(const ir::operation::Pad &op)
    assert(pad_buf);
  
    auto output_shape =
-      shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
+    shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
  
    // change output shape and reallocate output tensor memory
    output->applyShape(output_shape);
@@ -725,16 +727,16 @@ void DynamicShapeInferer::visit(const ir::operation::Range &op)
    if (output->data_type() == ir::DataType::FLOAT32)
    {
      new_shape =
-        shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
-                                                *reinterpret_cast<float *>(limit_tensor->buffer()),
-                                                *reinterpret_cast<float *>(delta_tensor->buffer()));
+      shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
+                                              *reinterpret_cast<float *>(limit_tensor->buffer()),
+                                              *reinterpret_cast<float *>(delta_tensor->buffer()));
    }
    else if (output->data_type() == ir::DataType::INT32)
    {
      new_shape = shape_inference::inferRangeShape<int32_t>(
-        *reinterpret_cast<int32_t *>(start_tensor->buffer()),
-        *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
-        *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
+      *reinterpret_cast<int32_t *>(start_tensor->buffer()),
+      *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
+      *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
    }
    output->applyShape(new_shape);
    assert(output->buffer() != nullptr);
@@ -828,7 +830,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
      assert(new_shape_buf);
  
      auto output_shape = shape_inference::inferReshapeShape(
-        new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
+      new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
  
      // if shape is changed, change output shape and reallocate output tensor memory
      if (output_shape != output->getShape() || output->buffer() == nullptr)
@@ -896,7 +898,7 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
      width_out = op.param().width_out;
    }
    auto output_shape =
-      shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
+    shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
  
    // if shape is changed, change output shape and reallocate output tensor memory
    if (output_shape != output->getShape() || output->buffer() == nullptr)
@@ -934,7 +936,7 @@ void DynamicShapeInferer::visit(const ir::operation::Select &op)
  
    // Select output shpae
    ir::Shape new_shape =
-      shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
+    shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
  
    auto output_ind = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_ind);
@@ -1019,7 +1021,7 @@ void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
    auto padding_data = reinterpret_cast<int32_t *>(padding->buffer());
  
    ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
-      input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
+    input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
  
    output->applyShape(new_shape);
    assert(output->buffer() != nullptr);
@@ -1120,15 +1122,14 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
    const auto rank = input_shape.rank();
  
    auto op_params = shape_inference::buildStridedSliceParams(
-      reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
-      reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask,
-      rank);
+    reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
+    reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask, rank);
  
    auto output_index = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_index);
  
    ir::Shape output_shape =
-      onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
+    onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
  
    output->applyShape(output_shape);
    assert(output->buffer() != nullptr);
@@ -1152,8 +1153,9 @@ void DynamicShapeInferer::visit(const ir::operation::Tile &op)
    auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer());
    assert(multiplier_buffer);
  
-  auto output_shape =
-      shape_inference::inferTileShape(input_shape, multiplier_buffer, multiplier->dimension(0));
+  auto mult_shape = multiplier->getShape();
+  auto output_shape = shape_inference::inferTileShape(
+    input_shape, multiplier_buffer, mult_shape.rank() == 0 ? 1 : mult_shape.dim(0));
  
    // set output shape and output buffer
    output->applyShape(output_shape);
@@ -1191,7 +1193,7 @@ void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
  
    ir::Shape new_shape;
    // TODO Change perm->dimension(0) == 0 to perm->num_elements() == 0
-  if (perm->dimension(0) == 0) // This condition means that perm is (n-1...0)
+  if (perm->getShape().dim(0) == 0) // This condition means that perm is (n-1...0)
    {
      // Call by (n-1...0)
      new_shape = shape_inference::inferTransposeShape(input_shape, nullptr, 0);
@@ -1199,7 +1201,7 @@ void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
    else
    {
      // Check rank
-    if (input->num_dimensions() != perm->getShape().num_elements())
+    if (static_cast<size_t>(input->getShape().rank()) != perm->getShape().num_elements())
      {
        throw std::runtime_error("DynamicShapeInferer failed, bad rank size: " +
                                 std::to_string(perm->getShape().num_elements()));
@@ -1207,7 +1209,8 @@ void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
  
      // set output shape, based on input and params
      const auto perm_buffer = reinterpret_cast<const int32_t *>(perm->buffer());
-    new_shape = shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->dimension(0));
+    new_shape =
+      shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->getShape().dim(0));
    }
    output->applyShape(new_shape);
    assert(output->buffer() != nullptr);
diff --git a/runtime/onert/core/src/exec/ExecTime.h b/runtime/onert/core/src/exec/ExecTime.h

index d2ddbad349e966d1ad08419d60925b84bf81314e..95f4600530ee2c7b16f34a1e3179e5488bae470d 100644 (file)
--- a/runtime/onert/core/src/exec/ExecTime.h
+++ b/runtime/onert/core/src/exec/ExecTime.h
@@ -34,7 +34,7 @@ class ExecTime
  {
  public:
    explicit ExecTime(const std::vector<const backend::Backend *> &backends)
-      : _json(backends, _measurements)
+    : _json(backends, _measurements)
    {
    }
  
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc

index 21fdd9c05211366d8e46b609779cdea8dabceb82..3d88cf5ff961b613ea0b2edab56bfbc89b260223 100644 (file)
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -40,7 +40,7 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
    _io_desc.dynamic_input_shapes[index] = new_shape;
  
    VERBOSE(Execution) << "Model input shape will be changed at the start of execute()"
-                     << "(index: " << index.value() << ")" << std::endl;
+                     << "(index: " << index << ")" << std::endl;
  }
  
  // TODO Remove default parameter
@@ -57,10 +57,10 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
    // note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
    {
      auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
-    auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
-                             ? input_shape_sig->second.num_elements() *
-                                   onert::ir::sizeOfDataType(info.typeInfo().type())
-                             : info.total_size();
+    auto size_required =
+      (input_shape_sig != _io_desc.dynamic_input_shapes.end())
+        ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type())
+        : info.total_size();
  
      if (length < size_required)
      {
@@ -117,14 +117,14 @@ void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout)
  {
    const auto &input_desc = _io_desc.inputs.at(index.value());
    _io_desc.inputs.at(index.value()) =
-      std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
+    std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
  }
  
  void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout)
  {
    const auto &output_desc = _io_desc.outputs.at(index.value());
-  _io_desc.outputs.at(index.value()) = std::make_unique<OutputDesc>(
-      output_desc->info, output_desc->buffer, output_desc->size, layout);
+  _io_desc.outputs.at(index.value()) =
+    std::make_unique<OutputDesc>(output_desc->info, output_desc->buffer, output_desc->size, layout);
  }
  
  void Execution::execute()
@@ -159,7 +159,7 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const
    auto itr = _io_desc.dynamic_input_shapes.find(ind);
    if (itr == _io_desc.dynamic_input_shapes.end())
    {
-    auto operand_idx = primary_subgraph().getInputs().at(ind.value());
+    auto operand_idx = primary_subgraph().getInputs().at(ind);
      return primary_subgraph().operands().at(operand_idx).shape();
    }
    else
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc

index d5003b126b87f0114eb42388861039cce1be2294..d6a2bfd1735c677fd5189a2e7005886fb99bf54d 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutionObservee.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservee.cc
@@ -42,22 +42,21 @@ void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
    }
  }
  
-void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index,
-                                       const ir::OpSequence *op_seq,
-                                       const backend::Backend *backend)
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind,
+                                       ir::OperationIndex op_ind, const backend::Backend *backend)
  {
    for (auto &o : _observers)
    {
-    o->handleJobBegin(executor, index, op_seq, backend);
+    o->handleJobBegin(executor, subg_ind, op_ind, backend);
    }
  }
  
-void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index,
-                                     const ir::OpSequence *op_seq, const backend::Backend *backend)
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind,
+                                     ir::OperationIndex op_ind, const backend::Backend *backend)
  {
    for (auto &o : _observers)
    {
-    o->handleJobEnd(executor, index, op_seq, backend);
+    o->handleJobEnd(executor, subg_ind, op_ind, backend);
    }
  }
  
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h

index 62b3f6201bc57c9e6211c941a210b74b3db5503e..423b5026bd5c8005337dd71be6274aa8a2f6be60 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutionObservee.h
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -42,9 +42,9 @@ public:
    void add(std::unique_ptr<IExecutionObserver> observer);
    void notifySubgraphBegin(ir::SubgraphIndex ind);
    void notifySubgraphEnd(ir::SubgraphIndex ind);
-  void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
+  void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind,
                        const backend::Backend *backend);
-  void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
+  void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind,
                      const backend::Backend *backend);
  
  private:
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc

index 18c0c1dd3beb2d65edf465e3f02e268fcb85413c..386178ae61fb73cb50719fc1af4ea8543a1a3c5a 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -22,19 +22,15 @@
  #include "util/logging.h"
  #include "exec/IExecutor.h"
  #include "misc/polymorphic_downcast.h"
-#include "ir/OpSequence.h"
+#include "ir/Operation.h"
  #include "util/EventWriter.h"
-#include "util/Utils.h"
  
  namespace
  {
  
-void setUserData(const onert::ir::Graph &g, const onert::ir::OpSequence *op_seq,
+void setUserData(const onert::ir::Graph &g, const onert::ir::Operation *op,
                   decltype(EventCollector::Event::userData) &data)
  {
-  if (op_seq->size() == 0)
-    return;
-
    // From a tensor of shape [a, b, c], this will return a string "shape(a b c)".
    // String like "[1, 2, 3]" looks better but this will be considered as a list in Json
    // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult
@@ -53,10 +49,7 @@ void setUserData(const onert::ir::Graph &g, const onert::ir::OpSequence *op_seq,
      return shape_str;
    };
  
-  const auto &first_op_idx = op_seq->operations().at(0);
-  const auto &first_op_node = g.operations().at(first_op_idx);
-
-  auto &inputs = first_op_node.getInputs();
+  auto &inputs = op->getInputs();
    auto size = inputs.size();
    for (size_t i = 0; i < size; i++)
    {
@@ -81,7 +74,7 @@ namespace exec
  {
  
  void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex,
-                                     const ir::OpSequence *, const onert::backend::Backend *backend)
+                                     ir::OperationIndex, const onert::backend::Backend *backend)
  {
    _timer = backend->config()->timer();
    if (_timer == nullptr)
@@ -89,14 +82,14 @@ void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex
    _timer->handleBegin();
  }
  
-void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, const ir::OpSequence *op_seq,
-                                   const backend::Backend *backend)
+void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex,
+                                   const ir::OperationIndex op_ind, const backend::Backend *backend)
  {
    _timer->handleEnd();
    const auto timer_res = _timer->getTime();
  
-  // NOTE This assumes there is just one operation in a op_seq
-  const auto &node = _graph.operations().at(op_seq->operations().at(0));
+  // NOTE This assumes there is just one operation in a op
+  const auto &node = _graph.operations().at(op_ind);
    auto node_name = node.name();
    VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl;
  
@@ -122,12 +115,9 @@ void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, const ir:
  
  TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph,
                                   const util::TracingCtx *tracing_ctx)
-    : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
-      _tracing_ctx{tracing_ctx}
+  : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
+    _tracing_ctx{tracing_ctx}
  {
-  // TODO Remove below after using _tracing_ctx
-  UNUSED_RELEASE(_tracing_ctx);
-
    _event_writer = EventWriter::get(filepath);
    _event_writer->startToUse();
  }
@@ -146,61 +136,36 @@ TracingObserver::~TracingObserver()
  
  void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind)
  {
-  // TODO Write subg_ind into profling result
-  UNUSED_RELEASE(subg_ind);
-  _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
+  _collector.onEvent(
+    EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::BEGIN, subg_ind.value()});
  }
  
  void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind,
-                                     const ir::OpSequence *op_seq, const backend::Backend *backend)
+                                     ir::OperationIndex op_ind, const backend::Backend *backend)
  {
-  // TODO Write subg_ind into profling result
-  UNUSED_RELEASE(subg_ind);
-
    std::string backend_id = backend->config()->id();
-
-  auto ev = EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
-                                  opSequenceTag(op_seq, _graph.operations())};
+  const auto &op = _graph.operations().at(op_ind);
+  auto ev = EventCollector::OpSeqEvent{_tracing_ctx,     EventCollector::Edge::BEGIN,
+                                       subg_ind.value(), backend_id,
+                                       op_ind.value(),   op.name()};
    // add shape of inputs
-  setUserData(_graph, op_seq, ev.userData);
-
+  setUserData(_graph, &op, ev.userData);
    _collector.onEvent(ev);
  }
  
  void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind,
-                                   const ir::OpSequence *op_seq, const backend::Backend *backend)
+                                   ir::OperationIndex op_ind, const backend::Backend *backend)
  {
-  // TODO Write subg_ind into profling result
-  UNUSED_RELEASE(subg_ind);
-
    std::string backend_id = backend->config()->id();
-  _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id,
-                                           opSequenceTag(op_seq, _graph.operations())});
+  _collector.onEvent(EventCollector::OpSeqEvent{_tracing_ctx, EventCollector::Edge::END,
+                                                subg_ind.value(), backend_id, op_ind.value(),
+                                                _graph.operations().at(op_ind).name()});
  }
  
  void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind)
  {
-  // TODO Write subg_ind into profling result
-  UNUSED_RELEASE(subg_ind);
-
-  _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
-}
-
-std::string TracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
-                                           const ir::Operations &operations)
-{
-  if (op_seq->size() == 0)
-    return "Empty OpSequence";
-
-  const auto &first_op_idx = op_seq->operations().at(0);
-  const auto &first_op_node = operations.at(first_op_idx);
-  std::string tag = "$" + std::to_string(first_op_idx.value());
-  tag += " " + first_op_node.name();
-  if (op_seq->size() > 1)
-  {
-    tag += " (+" + std::to_string(op_seq->size() - 1) + ")";
-  }
-  return tag;
+  _collector.onEvent(
+    EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::END, subg_ind.value()});
  }
  
  } // namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h

index a9eebfee1a9bfdd44d007209ae6ebc0b1797fe48..4c6c7b18e301a5568a9966d4f70507016dd4a430 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -19,7 +19,7 @@
  
  #include "exec/IFunction.h"
  #include "ir/Index.h"
-#include "ir/OpSequence.h"
+#include "ir/Operation.h"
  #include "ExecTime.h"
  #include "util/ITimer.h"
  #include "exec/IExecutor.h"
@@ -39,9 +39,9 @@ public:
    /// @brief Invoked just before model (not individual operation) execution begins
    virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; }
  
-  virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+  virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
                                const backend::Backend *) = 0;
-  virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+  virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
                              const backend::Backend *) = 0;
  
    /// @brief Invoked just after model (not individual operation) execution ends
@@ -54,12 +54,12 @@ class ProfileObserver : public IExecutionObserver
  {
  public:
    explicit ProfileObserver(std::shared_ptr<ExecTime> et, const ir::Graph &graph)
-      : _et(std::move(et)), _graph(graph)
+    : _et(std::move(et)), _graph(graph)
    {
    }
-  void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+  void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
                        const backend::Backend *) override;
-  void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+  void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
                      const backend::Backend *) override;
  
    void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); }
@@ -77,15 +77,12 @@ public:
                    const util::TracingCtx *tracing_ctx);
    ~TracingObserver();
    void handleSubgraphBegin(ir::SubgraphIndex) override;
-  void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+  void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
                        const backend::Backend *) override;
-  void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+  void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
                      const backend::Backend *) override;
    void handleSubgraphEnd(ir::SubgraphIndex) override;
  
-private:
-  static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations);
-
  private:
    std::unique_ptr<EventRecorder> _recorder;
    EventCollector _collector;
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc

index 588a3258d7b6f2895bf8a8f14dc7c9e6213aed43..3a624adefcd1fbc48b6a4548dba8657fe01623a3 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -17,7 +17,7 @@
  #include "ExecutorBase.h"
  #include "ShapeConverter.h"
  
-#include "backend/controlflow/UserTensor.h"
+#include "backend/builtin/UserTensor.h"
  #include "util/logging.h"
  #include "misc/polymorphic_downcast.h"
  
@@ -27,10 +27,12 @@ namespace exec
  {
  
  ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
+                           backend::BackendContexts &&backend_contexts,
                             const compiler::TensorRegistries &tensor_regs,
                             const util::TracingCtx *tracing_ctx)
-    : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex(),
-      _tracing_ctx(tracing_ctx)
+  : _lowered_graph{std::move(lowered_graph)},
+    _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(),
+    _tracing_ctx(tracing_ctx)
  {
    auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
      assert(tensors.empty());
@@ -38,7 +40,7 @@ ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_gra
      {
        backend::ITensor *tensor = tensor_regs.getITensor(ind);
        assert(tensor != nullptr);
-      auto io_tensor = nnfw::misc::polymorphic_downcast<backend::controlflow::IOTensor *>(tensor);
+      auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor);
        tensors.push_back(io_tensor);
      }
    };
@@ -67,7 +69,13 @@ void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs
      {
        const auto orig_input_shape = input_tensor->orig_info().shape();
        const auto changed_input_shape =
-          convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
+        convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
+      if (input_tensor->get_info().shape() != changed_input_shape)
+      {
+        // TODO Fix this workaround that is introduced since cpu based kernels directly use `_info`
+        // rather than interface methods to avoid virtual function calls.
+        input_tensor->setShapeOfIPortableTensor(changed_input_shape);
+      }
        if (orig_input_shape != changed_input_shape)
        {
          input_tensor->set_dynamic();
@@ -145,7 +153,7 @@ void ExecutorBase::execute(const IODescription &desc)
      // set shape of outputDesc to tensor shape since tensor can be dynamic
      const auto output_tensor_shape = _output_tensors[n]->getShape();
      output.info.shape(
-        convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
+      convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
    }
  }
  
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h

index 5d95c10bfbd35486a52a59676c23c76a9b65e8b8..3a124bd5b388163f3a26921c6005b426c75155be 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -25,11 +25,11 @@
  #include "exec/IODescription.h"
  #include "ir/Graph.h"
  #include "ir/Index.h"
-#include "ir/LowerInfoMap.h"
+#include "compiler/GraphLowerInfo.h"
  #include "ir/OperationIndexMap.h"
  #include "compiler/LoweredGraph.h"
  #include "compiler/TensorRegistries.h"
-#include "backend/controlflow/IOTensor.h"
+#include "backend/builtin/IOTensor.h"
  #include "util/TracingCtx.h"
  
  #include <cstdint>
@@ -51,6 +51,7 @@ public:
     * @param tensor_builders Tensor builders that are currently used
     */
    ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
+               backend::BackendContexts &&backend_contexts,
                 const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx);
  
    virtual ~ExecutorBase() = default;
@@ -72,7 +73,7 @@ public:
  
    void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
  
-  const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const override
+  const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
    {
      return _output_tensors;
    }
@@ -87,9 +88,10 @@ protected:
    ExecutionObservee _subject;
    std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
    std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
+  backend::BackendContexts _backend_contexts;
    const ir::Graph &_graph;
-  std::vector<backend::controlflow::IOTensor *> _input_tensors;
-  std::vector<backend::controlflow::IOTensor *> _output_tensors;
+  std::vector<backend::builtin::IOTensor *> _input_tensors;
+  std::vector<backend::builtin::IOTensor *> _output_tensors;
    std::mutex _mutex;
    const util::TracingCtx *_tracing_ctx;
  
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc

index 8aefa5eebef92acb33f95ba92b246ecd3ba9e1e2..df68b1b64016dce018b2b2035ac56f16434644fa 100644 (file)
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -17,7 +17,6 @@
  #include "exec/FunctionSequence.h"
  
  #include "ir/Operation.h"
-#include "backend/IDynamicTensorManager.h"
  #include "backend/ITensorRegistry.h"
  #include "util/logging.h"
  
@@ -33,16 +32,16 @@ void FunctionSequence::run()
      // acl_cl and acl_neon backend don't support dynamic shape.
      // _dynamic_tensor_ctx is always nullptr for acl_cl and acl_neon
      // Thus, those two bakends cannot reach here.
-    if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
-      throw std::runtime_error("operation and functions should be mapped one by one");
  
-    auto op_seq_iter = _dynamic_tensor_ctx->op_seq->begin();
+    // Do dynamic shape inference
+    auto op_ind = _dynamic_tensor_ctx->op_ind;
+    auto &op = _dynamic_tensor_ctx->operations->at(op_ind);
+    op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
+
      for (const auto &function : _functions)
      {
-      // set shape of output and allocate memory when needed
-      auto &op = _dynamic_tensor_ctx->operations->at(*op_seq_iter);
-      op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
-
+      // NOTE the function could be also FunctionSequence so we do this
+      // TODO Remove this or do this recursively
        auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get());
        if (sub_func_seq != nullptr)
        {
@@ -52,11 +51,6 @@ void FunctionSequence::run()
  
        // run kernel
        function->run();
-
-      // deallocate input tensors which is no longer used
-      _dynamic_tensor_ctx->dynamic_tensor_manager->deallocInput(*op_seq_iter);
-
-      op_seq_iter++;
      }
    }
    else
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h

index 8f62156a671d0fa7ff74b75ac1e71f2d191d468a..8e343cffa747eacbca8fcf9e4f3b509658c6143c 100644 (file)
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -99,7 +99,7 @@ public:
        auto &dst_offsets = _dst_tensors_offsets.at(i);
        if (src_tensor != dst_tensor)
        {
-        const auto rank = src_tensor->num_dimensions();
+        const auto rank = src_tensor->getShape().rank();
          permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
        }
      }
@@ -225,17 +225,18 @@ private:
          case PermuteType::NHWC_TO_NCHW:
          {
            ir::FeatureShape shape;
-          shape.N = dst->dimension(0);
-          shape.C = dst->dimension(1);
-          shape.H = dst->dimension(2);
-          shape.W = dst->dimension(3);
+          auto dst_shape = dst->getShape();
+          shape.N = dst_shape.dim(0);
+          shape.C = dst_shape.dim(1);
+          shape.H = dst_shape.dim(2);
+          shape.W = dst_shape.dim(3);
  
            typename feature::nchw::View<T>::Strides strides;
            const auto start_offset = dst->calcOffset({0, 0, 0, 0});
-          strides.W = dst->dimension(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
-          strides.H = dst->dimension(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
-          strides.C = dst->dimension(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
-          strides.N = dst->dimension(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
+          strides.W = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
+          strides.H = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
+          strides.C = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
+          strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
  
            const feature::nhwc::Reader<T> from(src);
            feature::nchw::View<T> into(shape, strides,
@@ -249,17 +250,18 @@ private:
          case PermuteType::NCHW_TO_NHWC:
          {
            ir::FeatureShape shape;
-          shape.N = dst->dimension(0);
-          shape.H = dst->dimension(1);
-          shape.W = dst->dimension(2);
-          shape.C = dst->dimension(3);
+          auto dst_shape = dst->getShape();
+          shape.N = dst_shape.dim(0);
+          shape.H = dst_shape.dim(1);
+          shape.W = dst_shape.dim(2);
+          shape.C = dst_shape.dim(3);
  
            typename feature::nhwc::View<T>::Strides strides;
            const auto start_offset = dst->calcOffset({0, 0, 0, 0});
-          strides.C = dst->dimension(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
-          strides.W = dst->dimension(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
-          strides.H = dst->dimension(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
-          strides.N = dst->dimension(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
+          strides.C = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
+          strides.W = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
+          strides.H = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
+          strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
  
            const feature::nchw::Reader<T> from(src);
            feature::nhwc::View<T> into(shape, strides,
diff --git a/runtime/onert/core/src/exec/JSONExecTime.h b/runtime/onert/core/src/exec/JSONExecTime.h

index 8987d723c9b177e2192ac89ec1e8b4016a45b341..e01723611c6d8f4c5edf7f2cec63d37edf8f710e 100644 (file)
--- a/runtime/onert/core/src/exec/JSONExecTime.h
+++ b/runtime/onert/core/src/exec/JSONExecTime.h
@@ -37,15 +37,15 @@ namespace exec
   * _measurements[Backend*]["string"][bool][uint32_t] = int64_t
   */
  using MeasurementData = std::unordered_map<
-    const backend::Backend *,
-    std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
+  const backend::Backend *,
+  std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
  
  class JSON
  {
  public:
    explicit JSON(const std::vector<const backend::Backend *> &backends,
                  MeasurementData &measurements)
-      : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
+    : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
    {
      for (const auto b : backends)
      {
diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc

index a6d4473126d5c004718372dc9273393fbded29c4..4d10c869bc0194eecc31792c0ae73cb1ef80434c 100644 (file)
--- a/runtime/onert/core/src/exec/LinearExecutor.cc
+++ b/runtime/onert/core/src/exec/LinearExecutor.cc
@@ -24,19 +24,6 @@ namespace onert
  namespace exec
  {
  
-#ifdef RUY_PROFILER
-namespace
-{
-char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operations &operations)
-{
-  auto node_name = operations.at(*op_seq->begin()).name();
-  char *cstr = new char[node_name.length() + 1];
-  std::strcpy(cstr, node_name.c_str());
-  return cstr;
-}
-} // namespace
-#endif
-
  void LinearExecutor::executeImpl()
  {
    auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
@@ -44,23 +31,23 @@ void LinearExecutor::executeImpl()
    _subject.notifySubgraphBegin(profiling_subg_index);
    for (auto &&code : _code)
    {
-    const auto op_seq = code.op_seq;
      const auto backend = code.lower_info->backend();
  // TODO : Move ruy profiler into ExecutionObserver
  #ifdef RUY_PROFILER
-    ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations()));
+    ruy::profiler::ScopeLabel label(code.op->name());
  #endif
-    _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+    _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend);
  
      auto &fn_seq = code.fn_seq;
  
      fn_seq->initRunning();
  
-    bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || hasDynamicInput();
+    bool handle_dynamic_tensor =
+      _lowered_graph->getHasDynamicTensor(code.op_ind) || hasDynamicInput();
      fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
      fn_seq->run();
  
-    _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
+    _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend);
    }
    _subject.notifySubgraphEnd(profiling_subg_index);
  }
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h

index d43c97012d2f8682a6907b32a7cd5bf49da71c01..39d6531547a4fa54ebbd786716836aecd3ffb466 100644 (file)
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -45,12 +45,13 @@ public:
     * @brief Construct a new LinearExecutor object
     * @param lowered_graph LoweredGraph object
     * @param tensor_builders Tensor builders that are currently used
-   * @param code_map OpSequence and its code map
+   * @param code_map @c ir::Operation and its code map
     */
    LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                 backend::BackendContexts &&backend_contexts,
                   const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
-                 const std::vector<ir::OpSequenceIndex> &order, const util::TracingCtx *tracing_ctx)
-      : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx}
+                 const std::vector<ir::OperationIndex> &order, const util::TracingCtx *tracing_ctx)
+    : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx}
    {
      for (auto index : order)
      {
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc

index e9e576ce81000abac643b8b6faa647bd761c94ad..9da7c82b42bc2d8c1550de826aad859f1bae660d 100644 (file)
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -31,7 +31,7 @@ class HookFunction : public IFunction
  public:
    HookFunction(IFunction *fn, const std::function<void()> &setup,
                 const std::function<void()> &teardown)
-      : _fn{fn}, _setup{setup}, _teardown{teardown}
+    : _fn{fn}, _setup{setup}, _teardown{teardown}
    {
    }
  
@@ -60,10 +60,12 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
  }
  
  ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                                   backend::BackendContexts &&backend_contexts,
                                     const compiler::TensorRegistries &tensor_regs,
                                     compiler::CodeMap &&code_map,
                                     const util::TracingCtx *tracing_ctx)
-    : DataflowExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), tracing_ctx}
+  : DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+                     std::move(code_map), tracing_ctx}
  {
    VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
  }
@@ -73,12 +75,12 @@ void ParallelExecutor::executeImpl()
    bool dynamic_input_exists = hasDynamicInput();
  
    // Init scheduler
-  // TODO Consider to have distinct backend set in LowerInfoMap
+  // TODO Consider to have distinct backend set in GraphLowerInfo
    BackendSet backends;
-  for (auto &itr : _lowered_graph->getLowerInfo()->op_seq)
-  {
-    backends.add(itr.second->backend());
-  }
+  _lowered_graph->lower_info().operation.iterate(
+    [&](const ir::OperationIndex &, const compiler::OperationLowerInfo &lower_info) {
+      backends.add(lower_info.backend());
+    });
    _scheduler = std::make_unique<ParallelScheduler>(backends);
  
    assert(noWaitingJobs());
@@ -121,24 +123,24 @@ void ParallelExecutor::executeImpl()
  
      lock.unlock();
  
-    VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl;
+    VERBOSE(ParallelExecutor) << "Assigning fn " << job->index() << std::endl;
  
      auto job_index = job->index();
-    auto op_sequence_index = _job_to_op_seq[job_index];
-    auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index);
-    auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend();
-    auto setup = [&, op_seq, backend]() {
-      _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+    auto op_ind = _job_to_op[job_index];
+    auto backend = _lowered_graph->lower_info().operation.at(op_ind).backend();
+    auto setup = [&, op_ind, backend]() {
+      _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
      };
-    auto teardown = [&, job_index, op_seq, backend]() {
-      _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
+    auto teardown = [&, job_index, op_ind, backend]() {
+      _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
        notify(job_index);
      };
  
      job->fn_seq()->initRunning();
  
      // dynamic tensor setting
-    bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
+    bool handle_dynamic_tensor =
+      _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
      job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
  
      _scheduler->assign(std::make_unique<HookFunction>(job->fn_seq(), setup, teardown), backend);
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h

index fd9db42e11065f0fe427729f006a6eb70410db70..7f107fa229a8d0979831fe8faf1602aa926d63ac 100644 (file)
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -49,9 +49,10 @@ public:
     *
     * @param lowered_graph LoweredGraph object
     * @param tensor_builders Tensor builders that are currently used
-   * @param code_map OpSequence and its code map
+   * @param code_map @c ir::Operation and its code map
     */
    ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                   backend::BackendContexts &&backend_contexts,
                     const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
                     const util::TracingCtx *tracing_ctx);
  
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h

index aebedd853620ce5ba5913c554109867d9d9a95ef..d5e3cb97cef73cbc670579d7f9c283397b0bda01 100644 (file)
--- a/runtime/onert/core/src/exec/feature/nchw/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h
@@ -39,32 +39,33 @@ public:
    using Strides = ir::FeatureShape;
    // Construct for buffer and strides
    Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len)
-      : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+    : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
    {
      UNUSED_RELEASE(len); // Workaround for unused variable in release mode
      assert(len == static_cast<size_t>(strides.N != 0
-                                          ? shape.N * strides.N
-                                          : strides.C != 0 ? shape.C * strides.C
-                                                           : strides.H != 0 ? shape.H * strides.H
-                                                                            : shape.W * strides.W));
+                                        ? shape.N * strides.N
+                                        : strides.C != 0 ? shape.C * strides.C
+                                                         : strides.H != 0 ? shape.H * strides.H
+                                                                          : shape.W * strides.W));
    }
  
    // Construct for backend tensor
    Reader(backend::ITensor *tensor)
-      : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+    : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
    {
      assert(tensor->layout() == ir::Layout::NCHW);
  
      const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
-    _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
-    _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
-    _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
-    _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
-    _shape.W = tensor->dimension(3);
-    _shape.H = tensor->dimension(2);
-    _shape.C = tensor->dimension(1);
-    _shape.N = tensor->dimension(0);
+    auto shape = tensor->getShape();
+    _strides.W = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+    _strides.H = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+    _strides.C = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+    _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+    _shape.W = shape.dim(3);
+    _shape.H = shape.dim(2);
+    _shape.C = shape.dim(1);
+    _shape.N = shape.dim(0);
    }
  
  public:
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h

index df357626482e0c7f8d658fb269fac071275fe562..cdbb0cd7ce466744459e6d27023188125b18a204 100644 (file)
--- a/runtime/onert/core/src/exec/feature/nchw/View.h
+++ b/runtime/onert/core/src/exec/feature/nchw/View.h
@@ -40,7 +40,7 @@ public:
    using Strides = typename Reader<T>::Strides;
    // Construct for buffer of model inputs
    View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len)
-      : Reader<T>{shape, strides, ptr, len}
+    : Reader<T>{shape, strides, ptr, len}
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h

index da6a5f6a9d51042bc5806f70f998341a6b7544a3..0bc1ee95b16f15e5926828128f18ac033d868ee7 100644 (file)
--- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
@@ -40,32 +40,33 @@ public:
    using Strides = ir::FeatureShape;
    // Construct for buffer and strides
    Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len)
-      : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+    : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
    {
      UNUSED_RELEASE(len); // Workaround for unused variable in release mode
      assert(len == static_cast<size_t>(strides.N != 0
-                                          ? shape.N * strides.N
-                                          : strides.H != 0 ? shape.H * strides.H
-                                                           : strides.W != 0 ? shape.W * strides.W
-                                                                            : shape.C * strides.C));
+                                        ? shape.N * strides.N
+                                        : strides.H != 0 ? shape.H * strides.H
+                                                         : strides.W != 0 ? shape.W * strides.W
+                                                                          : shape.C * strides.C));
    }
  
    // Construct for backend tensor
    Reader(const backend::ITensor *tensor)
-      : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+    : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
    {
      assert(tensor->layout() == ir::Layout::NHWC);
  
      const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
-    _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
-    _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
-    _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
-    _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
-    _shape.C = tensor->dimension(3);
-    _shape.W = tensor->dimension(2);
-    _shape.H = tensor->dimension(1);
-    _shape.N = tensor->dimension(0);
+    auto shape = tensor->getShape();
+    _strides.C = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+    _strides.W = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+    _strides.H = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+    _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+    _shape.C = shape.dim(3);
+    _shape.W = shape.dim(2);
+    _shape.H = shape.dim(1);
+    _shape.N = shape.dim(0);
    }
  
  public:
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h

index a77f6802479103877fb2f999c676a5942bfb7cbc..40d1d237c056296723479abd9558a6298ad12821 100644 (file)
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -41,7 +41,7 @@ public:
    using Strides = typename Reader<T>::Strides;
    // Construct for buffer and strides
    View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len)
-      : Reader<T>{shape, strides, ptr, len}
+    : Reader<T>{shape, strides, ptr, len}
    {
      // DO NOTHING
    }
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc

index cd31a4dca8cdbea88bea513df75281565d7242a6..44d1575d7fb7edf50a47ba378a32ea9773033ae1 100644 (file)
--- a/runtime/onert/core/src/interp/InterpExecutor.cc
+++ b/runtime/onert/core/src/interp/InterpExecutor.cc
@@ -50,7 +50,7 @@ void InterpExecutor::execute(const exec::IODescription &desc)
  
      auto input_tensor = std::make_shared<ROTensor>(input->info);
      input_tensor->setData(std::make_shared<const ir::ExternalData>(
-        reinterpret_cast<const uint8_t *>(input->buffer), input->size));
+      reinterpret_cast<const uint8_t *>(input->buffer), input->size));
      tensor_map[input_index] = input_tensor;
    }
  
@@ -66,7 +66,7 @@ void InterpExecutor::execute(const exec::IODescription &desc)
    {
      if (tensor_map.find(index) != tensor_map.end())
      {
-      VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl;
+      VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index << std::endl;
        interp_env->assignTensor(index, tensor_map.at(index));
      }
    }
@@ -86,22 +86,22 @@ void InterpExecutor::execute(const exec::IODescription &desc)
                           << std::endl;
  
      interp_env->assignExternalBuffer(
-        output_index, std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer),
-                                                       output->size));
+      output_index,
+      std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer), output->size));
    }
  
    // Allocate constant tensor
    _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
      if (obj.isConstant())
      {
-      VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value()
+      VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind
                             << std::endl;
  
        assert(obj.data());
        auto const_tensor = std::make_shared<ROTensor>(obj.info());
        // Assume that interpreter's tensor layout is same with model (NHWC)
        const_tensor->setData(
-          std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
+        std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
        interp_env->assignTensor(ind, const_tensor);
      }
    });
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h

index 99d7b3af7c61dd1aaa5b8a61f3f05b7c7b3fe9bc..6e3a0232734ab9ac5aaa2f829271684dbc4d5e95 100644 (file)
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ b/runtime/onert/core/src/interp/InterpExecutor.h
@@ -51,7 +51,7 @@ public:
     */
    const ir::Graph &graph() final { return _graph; }
    void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
-      // Not implemented
+    // Not implemented
    };
    /**
     * @brief  Start execution
@@ -63,7 +63,7 @@ public:
    {
      throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"};
    }
-  const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const final
+  const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const final
    {
      throw new std::runtime_error{"Interpreter does not support this function."};
    }
diff --git a/runtime/onert/core/src/interp/Interpreter.cc b/runtime/onert/core/src/interp/Interpreter.cc

index b92afbe73018b48d770a584c97b0f1b4412e9138..e01afb8a63d9fab0dae9273057c8de3b1d075668 100644 (file)
--- a/runtime/onert/core/src/interp/Interpreter.cc
+++ b/runtime/onert/core/src/interp/Interpreter.cc
@@ -49,7 +49,7 @@ public:
      const ir::Operation &node = _env->graph().operations().at(idx);
      const auto nodeName = node.name();
      VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
-                         << " operation (id: " << idx.value() << ")" << std::endl;
+                         << " operation (id: " << idx << ")" << std::endl;
  
      const auto nodeOpCode = node.opcode();
      if (_kernels.find(nodeOpCode) == _kernels.end())
@@ -83,7 +83,7 @@ void Interpreter::run()
    //       But that scenario may not exist
    for (auto ind : _env->graph().getInputs())
    {
-    VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl;
+    VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind << std::endl;
  
      operand_stack.push(ind);
    }
@@ -91,7 +91,7 @@ void Interpreter::run()
    _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
      if (obj.isConstant())
      {
-      VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl;
+      VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind << std::endl;
  
        operand_stack.push(ind);
      }
@@ -129,7 +129,7 @@ void Interpreter::run()
  
        if (operator_ready)
        {
-        VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl;
+        VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator << std::endl;
          operation_stack.push(use_operator);
        }
      }
@@ -138,7 +138,7 @@ void Interpreter::run()
      {
        const auto current_operation_index = operation_stack.top();
        operation_stack.pop();
-      VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "("
+      VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index << "("
                             << _env->graph().operations().at(current_operation_index).name() << ")"
                             << std::endl;
  
diff --git a/runtime/onert/core/src/interp/Tensor.cc b/runtime/onert/core/src/interp/Tensor.cc

index 07f8b75dcbf0b9913a79144f6975d2b4357f5995..de095c9e41c02878da1318a036e3e5e1899c6af2 100644 (file)
--- a/runtime/onert/core/src/interp/Tensor.cc
+++ b/runtime/onert/core/src/interp/Tensor.cc
@@ -49,5 +49,9 @@ ir::Layout Tensor::layout() const
    return ir::Layout::NHWC;
  }
  
+ir::Shape Tensor::getShape() const { return _info.shape(); }
+
+ir::Shape ROTensor::getShape() const { return _info.shape(); }
+
  } // namespace interp
  } // namespace onert
diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h

index 8b72d537dab907bde8774ed1ade6b543c3e07bef..642fdc164a24e6037bb5645043ab0849c80e1bc3 100644 (file)
--- a/runtime/onert/core/src/interp/Tensor.h
+++ b/runtime/onert/core/src/interp/Tensor.h
@@ -70,8 +70,6 @@ public:
    virtual void releaseData() = 0;
  
    virtual size_t total_size() const = 0;
-  virtual size_t dimension(size_t index) const = 0;
-  virtual size_t num_dimensions() const = 0;
    virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
  
    virtual bool has_padding() const = 0;
@@ -118,17 +116,21 @@ public:
    void releaseData() override { _data = nullptr; }
  
    size_t total_size() const override { return _info.total_size(); }
-  size_t dimension(size_t index) const override { return _info.shape().dim(index); }
-  size_t num_dimensions() const override { return _info.shape().rank(); }
    size_t calcOffset(const ir::Coordinates &coords) const override;
    ir::Layout layout() const override;
    bool is_dynamic() const override { return false; }
    bool has_padding() const override { return false; }
    ir::DataType data_type() const override { return _info.typeInfo().type(); }
    float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_offset() const override { return _info.typeInfo().offset(); }
+  int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
+  const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
+  const std::vector<int32_t> &data_zero_points() const override
+  {
+    return _info.typeInfo().zero_points();
+  }
    const ir::OperandInfo &tensorInfo() const override { return _info; }
    uint64_t num_elements() const override { return _info.shape().num_elements(); };
+  ir::Shape getShape() const override;
  
  private:
    const ir::OperandInfo _info;
@@ -160,17 +162,21 @@ public:
    void releaseData() override { _buffer = nullptr; }
  
    size_t total_size() const override { return _info.total_size(); }
-  size_t dimension(size_t index) const override { return _info.shape().dim(index); }
-  size_t num_dimensions() const override { return _info.shape().rank(); }
    size_t calcOffset(const ir::Coordinates &coords) const override;
    ir::Layout layout() const override;
    bool is_dynamic() const override { return false; }
    bool has_padding() const override { return false; }
    ir::DataType data_type() const override { return _info.typeInfo().type(); }
    float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_offset() const override { return _info.typeInfo().offset(); }
+  int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
+  const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
+  const std::vector<int32_t> &data_zero_points() const override
+  {
+    return _info.typeInfo().zero_points();
+  }
    const ir::OperandInfo &tensorInfo() const override { return _info; }
    uint64_t num_elements() const override { return _info.shape().num_elements(); };
+  ir::Shape getShape() const override;
  
  private:
    const ir::OperandInfo _info;
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc

index 86e883524e75ef20d8c448d89cf57c1a7910567d..804e9fb510fe0ffd2fd5a33925e7710727f9144b 100644 (file)
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -40,7 +40,7 @@ enum class OpType
  void prepare(ExecEnv *env, const ir::Operation &node)
  {
    const auto &arithmetic_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
+    nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
  
    const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
    const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
@@ -68,7 +68,7 @@ void prepare(ExecEnv *env, const ir::Operation &node)
      }
  
      auto output_info =
-        ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
+      ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
      // We can handle already allocated (ex. model output)
      env->allocateIfNeeded(out_index, output_info);
    }
@@ -119,14 +119,13 @@ void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor
    raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
  
    const auto cker_op_type =
-      (op_type == OpType::ADD)
-          ? nnfw::cker::BinaryArithmeticOpType::ADD
-          : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
-                                      : nnfw::cker::BinaryArithmeticOpType::MUL);
+    (op_type == OpType::ADD) ? nnfw::cker::BinaryArithmeticOpType::ADD
+                             : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
+                                                         : nnfw::cker::BinaryArithmeticOpType::MUL);
  
-  const bool need_broadcast = nnfw::cker::ProcessBroadcastShapes(
-      convertShape(lhs_tensor->tensorInfo().shape()),
-      convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
+  const bool need_broadcast =
+    nnfw::cker::ProcessBroadcastShapes(convertShape(lhs_tensor->tensorInfo().shape()),
+                                       convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
  
    if (need_broadcast)
    {
@@ -173,7 +172,7 @@ void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArith
  void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
  {
    const auto &arithmetic_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
+    nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
  
    switch (arithmetic_node.param().arithmetic_type)
    {
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc

index efc46c66b4774b7bb4a2a9b81668d1613a62ee35..a063ab14ae159ba4be6b99434b6bc04a5d5f1a6f 100644 (file)
--- a/runtime/onert/core/src/interp/operations/Concat.cc
+++ b/runtime/onert/core/src/interp/operations/Concat.cc
@@ -39,44 +39,44 @@ void prepareConcat(ExecEnv *env, const ir::Operation &node)
    const auto first_tensor = env->tensorAt(first_index);
    uint32_t out_axis_dimension = 0;
    const int32_t axis_raw = concat_node.param().axis;
-  const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw;
+  const int32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->getShape().rank()) : axis_raw;
  
    // All inputs shape should be same except axis dimension
    // All inputs type should be same
    for (auto input : node.getInputs())
    {
-    assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions());
+    assert(first_tensor->getShape().rank() == env->tensorAt(input)->getShape().rank());
      assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
-    for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
+    for (int i = 0; i < first_tensor->getShape().rank(); i++)
      {
        if (i == axis)
        {
-        out_axis_dimension += env->tensorAt(input)->dimension(i);
+        out_axis_dimension += env->tensorAt(input)->getShape().dim(i);
          continue;
        }
-      assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i));
+      assert(first_tensor->getShape().dim(i) == env->tensorAt(input)->getShape().dim(i));
      }
    }
  
    // Make output tensor info using first input tensor info, and accumulated axis dimension value
    auto out_shape = first_tensor->tensorInfo().shape();
    out_shape.dim(axis) = out_axis_dimension;
-  env->allocateIfNeeded(out_index, ir::OperandInfo::createStaticInfo(
-                                       out_shape, first_tensor->tensorInfo().typeInfo()));
+  env->allocateIfNeeded(
+    out_index, ir::OperandInfo::createStaticInfo(out_shape, first_tensor->tensorInfo().typeInfo()));
  
    auto out_tensor = env->tensorAt(out_index);
    UNUSED_RELEASE(out_tensor);
  
-  // Output shape should be same with input except axis dimension
+  // Output shape should be same with input except axis getShape().dim
    // Output type should be same with input
    assert(first_tensor->data_type() == out_tensor->data_type());
-  for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
+  for (int i = 0; i < first_tensor->getShape().rank(); i++)
    {
      if (i == axis)
      {
        continue;
      }
-    assert(first_tensor->dimension(i) == out_tensor->dimension(i));
+    assert(first_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
    }
  }
  
@@ -123,7 +123,7 @@ void invokeConcat(const ExecEnv *env, const ir::Operation &node)
  
    const auto out_index = node.getOutputs().at(0);
    const auto out_tensor = env->tensorAt(out_index);
-  const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw;
+  const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->getShape().rank()) : axis_raw;
  
    const auto data_type = in_tensors[0]->data_type();
    if (data_type == ir::DataType::FLOAT32)
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc

index bb00b828c11e25769c784e893795862e5d2883be..0b43a4799181223ac73b6c4f92d32d5902aacccf 100644 (file)
--- a/runtime/onert/core/src/interp/operations/Conv2D.cc
+++ b/runtime/onert/core/src/interp/operations/Conv2D.cc
@@ -42,9 +42,9 @@ void prepareConv2D(ExecEnv *env, const ir::Operation &node)
    const auto kernel_tensor = env->tensorAt(kernel_index);
    const auto bias_tensor = env->tensorAt(bias_index);
  
-  assert(in_tensor->num_dimensions() == 4);
-  assert(kernel_tensor->num_dimensions() == 4);
-  assert(bias_tensor->num_dimensions() == 1);
+  assert(in_tensor->getShape().rank() == 4);
+  assert(kernel_tensor->getShape().rank() == 4);
+  assert(bias_tensor->getShape().rank() == 1);
  
    UNUSED_RELEASE(in_tensor);
    UNUSED_RELEASE(kernel_tensor);
@@ -56,9 +56,9 @@ void prepareConv2D(ExecEnv *env, const ir::Operation &node)
      // Handle unspecified output shape
      const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
      const auto infered_output_shape = shape_inference::inferConv2DShape(
-        in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
+      in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
      env->allocateIfNeeded(
-        out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
+      out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
    }
    else
    {
@@ -70,7 +70,7 @@ void prepareConv2D(ExecEnv *env, const ir::Operation &node)
  
    // Handle same ifm & ofm data type only
    assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 4);
+  assert(out_tensor->getShape().rank() == 4);
  }
  
  void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
@@ -83,8 +83,8 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor
    const auto &ker_shape = ker_tensor->tensorInfo().shape();
    const auto ker_height = ker_shape.dim(1);
    const auto ker_width = ker_shape.dim(2);
-  const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
-                                            ker_width, ker_height);
+  const auto padding =
+    ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
  
    // Calculate
    float activation_min, activation_max;
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc

index e1fb767fe66404aa2b7b1e976c6de7783292bb6c..d1c62d73fc60f63ca3f2e9a035cd00abff3498c6 100644 (file)
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
@@ -43,9 +43,9 @@ void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
    const auto kernel_tensor = env->tensorAt(kernel_index);
    const auto bias_tensor = env->tensorAt(bias_index);
  
-  assert(in_tensor->num_dimensions() == 4);
-  assert(kernel_tensor->num_dimensions() == 4);
-  assert(bias_tensor->num_dimensions() == 1);
+  assert(in_tensor->getShape().rank() == 4);
+  assert(kernel_tensor->getShape().rank() == 4);
+  assert(bias_tensor->getShape().rank() == 1);
  
    UNUSED_RELEASE(in_tensor);
    UNUSED_RELEASE(kernel_tensor);
@@ -58,12 +58,12 @@ void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
    {
      // Handle unspecified output shape
      const auto &depth_conv_node =
-        nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
+      nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
      const auto infered_output_shape = shape_inference::inferDepthwiseConv2DShape(
-        in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
-        depth_conv_node.param());
+      in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
+      depth_conv_node.param());
      env->allocateIfNeeded(
-        out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
+      out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
    }
    else
    {
@@ -75,7 +75,7 @@ void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
  
    // Handle same ifm & ofm data type only
    assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 4);
+  assert(out_tensor->getShape().rank() == 4);
  }
  
  void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
@@ -88,8 +88,8 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor
    const auto &ker_shape = ker_tensor->tensorInfo().shape();
    const auto ker_height = ker_shape.dim(1);
    const auto ker_width = ker_shape.dim(2);
-  const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
-                                            ker_width, ker_height);
+  const auto padding =
+    ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
  
    // Calculate
    float activation_min, activation_max;
diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc

index c8773bef46d787b452e1579ff43c2442d7248c6c..197855ff4618d7cd14c1c61c9e00e7efb6d02dd9 100644 (file)
--- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
+++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
@@ -118,7 +118,7 @@ template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Ope
      else
      {
        const auto &act_node =
-          nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+        nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
        evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
                            act_node.param().beta);
      }
@@ -132,7 +132,7 @@ template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Ope
  void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
  {
    const auto &act_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+    nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
    switch (act_node.param().op_type)
    {
      case ir::operation::ElementwiseActivation::Type::LOGISTIC:
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc

index 4f97632b2e9b9cf5dd1224996623afe22a9bf0cf..ef827605bc074d3d826a0ee9e18fc4bf543d655e 100644 (file)
--- a/runtime/onert/core/src/interp/operations/FullyConnected.cc
+++ b/runtime/onert/core/src/interp/operations/FullyConnected.cc
@@ -44,23 +44,23 @@ void prepareFC(ExecEnv *env, const ir::Operation &node)
    UNUSED_RELEASE(kernel_tensor);
    UNUSED_RELEASE(bias_tensor);
  
-  assert(in_tensor->num_dimensions() >= 2);
-  assert(kernel_tensor->num_dimensions() == 2);
-  assert(bias_tensor->num_dimensions() == 1);
+  assert(in_tensor->getShape().rank() >= 2);
+  assert(kernel_tensor->getShape().rank() == 2);
+  assert(bias_tensor->getShape().rank() == 1);
  
    const auto input_size_with_batch = in_tensor->num_elements();
-  const auto num_units = kernel_tensor->dimension(0);
-  const auto input_size = kernel_tensor->dimension(1);
-  const auto batch_size = input_size_with_batch / input_size;
+  const auto num_units = kernel_tensor->getShape().dim(0);
+  const auto input_size = kernel_tensor->getShape().dim(1);
+  const int32_t batch_size = input_size_with_batch / input_size;
    assert(input_size_with_batch % input_size == 0);
-  assert(num_units == bias_tensor->dimension(0));
+  assert(num_units == bias_tensor->getShape().dim(0));
  
    // Make output tensor info
    ir::Shape output_shape(2);
    output_shape.dim(0) = batch_size;
    output_shape.dim(1) = num_units;
    const auto out_info =
-      ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
+    ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
    env->allocateIfNeeded(out_index, out_info);
  
    auto out_tensor = env->tensorAt(out_index);
@@ -68,9 +68,9 @@ void prepareFC(ExecEnv *env, const ir::Operation &node)
  
    // Handle same ifm & ofm data type only
    assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 2);
-  assert(out_tensor->dimension(0) == batch_size);
-  assert(out_tensor->dimension(1) == num_units);
+  assert(out_tensor->getShape().rank() == 2);
+  assert(out_tensor->getShape().dim(0) == batch_size);
+  assert(out_tensor->getShape().dim(1) == num_units);
  }
  
  void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
@@ -100,7 +100,7 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor
  void invokeFC(const ExecEnv *env, const ir::Operation &node)
  {
    const auto &conv_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
+    nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
  
    const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
    const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc

index 9e82def5f5c7e53e8dfaa7e1a2e649d50eb427b1..0ea60875c4560134cd2f65f4504210f8d09a27ee 100644 (file)
--- a/runtime/onert/core/src/interp/operations/Gather.cc
+++ b/runtime/onert/core/src/interp/operations/Gather.cc
@@ -56,9 +56,9 @@ void prepareGather(ExecEnv *env, const ir::Operation &node)
    }
  
    auto output_tensor = env->tensorAt(output_index);
-  auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1;
+  auto output_rank = input_tensor->getShape().rank() + indices_tensor->getShape().rank() - 1;
  
-  if (output_rank != output_tensor->num_dimensions())
+  if (output_rank != output_tensor->getShape().rank())
    {
      throw std::runtime_error{"Interp(Gather): Invalid output rank"};
    }
@@ -71,7 +71,7 @@ void prepareGather(ExecEnv *env, const ir::Operation &node)
        input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
    {
      throw std::runtime_error{
-        "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
+      "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
    }
  }
  
@@ -106,7 +106,7 @@ void invokeGather(const ExecEnv *env, const ir::Operation &node)
    const auto input_tensor = env->tensorAt(input_index);
    const auto indices_tensor = env->tensorAt(indices_index);
    const auto output_tensor = env->tensorAt(output_index);
-  const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw;
+  const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->getShape().rank()) : axis_raw;
  
    const auto data_type = input_tensor->data_type();
  
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc

index 2538bcc394c23fec5ec559dd0abc36805be00e71..b5c38819d4f3c134fddf075c1474626549fcd997 100644 (file)
--- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc
+++ b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
@@ -32,13 +32,13 @@ namespace instancenorm
  void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
  {
    const auto &instancenorm_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
+    nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
  
    const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
    const auto output_index = node.getOutputs().at(0);
    const auto input_tensor = env->tensorAt(input_index);
  
-  if (input_tensor->num_dimensions() != 4)
+  if (input_tensor->getShape().rank() != 4)
    {
      throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
    }
@@ -88,7 +88,7 @@ void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITen
  void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
  {
    const auto &instancenorm_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
+    nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
  
    const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
    const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc

index c8dce698df73936e1dab53bd2bc669d9ae13b7a3..0eec7fe9a3bf4fef265ecfdf3d56a95b0e943603 100644 (file)
--- a/runtime/onert/core/src/interp/operations/Pad.cc
+++ b/runtime/onert/core/src/interp/operations/Pad.cc
@@ -61,7 +61,7 @@ void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITenso
    const auto pad_buffer = pad_tensor->bufferRO();
    auto output_buffer = output_tensor->buffer();
  
-  int32_t pad_rank = pad_tensor->dimension(0);
+  int32_t pad_rank = pad_tensor->getShape().dim(0);
  
    const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
    const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc

index 92f9d70b2d1555e4c337d729cb8db786fdd2a284..2f3b71655e1c616bae569ca114768c54af4fa351 100644 (file)
--- a/runtime/onert/core/src/interp/operations/Pool2D.cc
+++ b/runtime/onert/core/src/interp/operations/Pool2D.cc
@@ -41,16 +41,16 @@ void preparePool2D(ExecEnv *env, const ir::Operation &node)
    const auto in_tensor = env->tensorAt(in_index);
    UNUSED_RELEASE(in_tensor);
  
-  assert(in_tensor->num_dimensions() == 4);
+  assert(in_tensor->getShape().rank() == 4);
  
    const auto output_info = env->graph().operands().at(out_index).info();
    if (output_info.total_size() == 0)
    {
      // Handle unspecified output shape
      const auto infered_output_shape =
-        shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
+      shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
      env->allocateIfNeeded(
-        out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
+      out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
    }
    else
    {
@@ -62,7 +62,7 @@ void preparePool2D(ExecEnv *env, const ir::Operation &node)
  
    // Handle same ifm & ofm data type only
    assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 4);
+  assert(out_tensor->getShape().rank() == 4);
  }
  
  template <typename T>
@@ -100,7 +100,7 @@ void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
    const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
    const auto param = pool_node.param();
    const auto padding =
-      ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
+    ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
    // Calculate
    nnfw::cker::PoolParams cker_param;
    cker_param.filter_width = param.kw;
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc

index d30f78debacd36bd547bb0f01b4e8e91804278bb..1fc303117e8e126f2e60c5b32f6aa02bc0b897e1 100644 (file)
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -37,7 +37,7 @@ void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
    const auto in_tensor = env->tensorAt(in_index);
    UNUSED_RELEASE(in_tensor);
  
-  assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2));
+  assert((in_tensor->getShape().rank() == 4) || (in_tensor->getShape().rank() == 2));
  
    // Output shape should be same with input
    // Output type is pre-defined in model
@@ -51,10 +51,10 @@ void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
    UNUSED_RELEASE(out_tensor);
  
    // Check output shape is same with input
-  assert(out_tensor->num_dimensions() == out_tensor->num_dimensions());
-  for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++)
+  assert(out_tensor->getShape().rank() == out_tensor->getShape().rank());
+  for (int32_t i = 0; i < in_tensor->getShape().rank(); i++)
    {
-    assert(in_tensor->dimension(i) == out_tensor->dimension(i));
+    assert(in_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
    }
  }
  
@@ -66,14 +66,14 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
  
    float beta = param.beta;
  
-  if (in_tensor->num_dimensions() == 2)
+  if (in_tensor->getShape().rank() == 2)
    {
-    uint32_t batch_size = in_tensor->dimension(0);
-    uint32_t input_size = in_tensor->dimension(1);
+    uint32_t batch_size = in_tensor->getShape().dim(0);
+    uint32_t input_size = in_tensor->getShape().dim(1);
  
      nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
    }
-  else if (in_tensor->num_dimensions() == 4)
+  else if (in_tensor->getShape().rank() == 4)
    {
      const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
      const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc

index cc2ced26bef3b987c3536a6c64287ed1dbc2bb18..755103dc25f008b0d807cf43bf2dae11d91e0e19 100644 (file)
--- a/runtime/onert/core/src/interp/operations/TransposeConv.cc
+++ b/runtime/onert/core/src/interp/operations/TransposeConv.cc
@@ -40,9 +40,9 @@ void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
    const auto ker_tensor = env->tensorAt(ker_index);
    const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
  
-  assert(ifm_tensor->num_dimensions() == 4);
-  assert(ker_tensor->num_dimensions() == 4);
-  assert(ofm_shape_tensor->num_dimensions() == 1);
+  assert(ifm_tensor->getShape().rank() == 4);
+  assert(ker_tensor->getShape().rank() == 4);
+  assert(ofm_shape_tensor->getShape().rank() == 1);
  
    UNUSED_RELEASE(ifm_tensor);
    UNUSED_RELEASE(ker_tensor);
@@ -68,7 +68,7 @@ void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
      throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
    }
  
-  if (ofm_tensor->num_dimensions() != 4)
+  if (ofm_tensor->getShape().rank() != 4)
    {
      throw std::runtime_error{"Interp(TConv): Invalid output rank"};
    }
@@ -83,8 +83,8 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor
    const auto ker_shape = ker_tensor->tensorInfo().shape();
    const auto ker_height = ker_shape.dim(1);
    const auto ker_width = ker_shape.dim(2);
-  const auto padding = ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride,
-                                            ker_width, ker_height);
+  const auto padding =
+    ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride, ker_width, ker_height);
  
    nnfw::cker::TransposeConvParams cker_param;
    cker_param.padding_values.width = padding.left;
@@ -108,7 +108,7 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor
  void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
  {
    const auto &tconv_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
+    nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
  
    const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
    const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc

index 1b8300f407cea4df312a4d2b08e58f176048f9f8..df30bbdbefed8d3651ec3d83f3e7dedb14c1f2ec 100644 (file)
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -19,16 +19,16 @@
  #include "OperationValidator.h"
  
  #include <algorithm>
+
  #include <bitset>
  #include <sstream>
  
  #include "util/logging.h"
+#include "util/Set.h"
  #include "verifier/Verifier.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/operand/LowerInfo.h"
-#include "ir/operand/PermuteFactor.h"
  #include "ir/OperandIndexMap.h"
-#include "ir/GraphIterator.h"
+#include "ir/OperationIndexMap.h"
+#include "dumper/text/GraphDumper.h"
  #include "backend/IConfig.h"
  
  namespace onert
@@ -45,22 +45,68 @@ OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
    return _operands.emplace(shape, type);
  }
  
-OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&node)
+OperandIndex Graph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand)
+{
+  return _operands.push(std::move(operand), index);
+}
+
+bool Graph::checkOperandsForOperation(const Operation &operation)
+{
+  auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  for (auto input : inputs)
+    if (!operands().exist(input))
+      return false;
+  for (auto input : outputs)
+    if (!operands().exist(input))
+      return false;
+  return true;
+}
+
+void Graph::linkOperandToOperation(OperationIndex index, const Operation &operation)
  {
-  assert(isBuildingPhase());
-  return _operations.push(std::move(node));
+  auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+  for (auto input : inputs)
+    operands().at(input).insertUse(index);
+  for (auto output : outputs)
+    operands().at(output).setDef(index);
+}
+
+OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&operation)
+{
+  const Operation &op_ref = *operation;
+  if (!checkOperandsForOperation(op_ref))
+    return OperationIndex{};
+  auto ind = _operations.push(std::move(operation));
+  if (ind.valid())
+    linkOperandToOperation(ind, op_ref);
+  return ind;
+}
+
+OperationIndex Graph::addOperation(OperationIndex index, std::unique_ptr<Operation> &&operation)
+{
+  const Operation &op_ref = *operation;
+  if (!checkOperandsForOperation(op_ref))
+    return OperationIndex{};
+  auto ind_gen = _operations.push(std::move(operation), index);
+  if (ind_gen.valid())
+  {
+    assert(ind_gen == index);
+    linkOperandToOperation(index, op_ref);
+  }
+  return index;
  }
  
  void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
  {
-  assert(isBuildingPhase());
    assert(_operands.exist(ind));
    _operands.at(ind).data(std::move(data));
  }
  
  void Graph::addInput(const OperandIndex &ind, const std::string &name)
  {
-  assert(isBuildingPhase());
    if (!name.empty())
      _name_to_input.emplace(name, IOIndex{_inputs.size()});
    _inputs.append(ind);
@@ -68,7 +114,6 @@ void Graph::addInput(const OperandIndex &ind, const std::string &name)
  
  void Graph::addOutput(const OperandIndex &ind, const std::string &name)
  {
-  assert(isBuildingPhase());
    if (!name.empty())
      _name_to_output.emplace(name, IOIndex{_outputs.size()});
    _outputs.append(ind);
@@ -86,14 +131,8 @@ IOIndex Graph::getOutputIndex(const std::string &name) const
    return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
  }
  
-void Graph::finishBuilding(void)
+void Graph::verify(void)
  {
-  assert(isBuildingPhase());
-  _phase = Phase::MODEL;
-
-  initializeUseDef();
-  sweepGarbageOperands();
-
    // Call graph verifications for the MODEL phase
    {
      // Except for edge consistency, the user might have been given a bad model
@@ -102,7 +141,7 @@ void Graph::finishBuilding(void)
        throw std::runtime_error{"One of model input and output operands does not exist."};
      if (!verifier::DAGChecker().verify(*this))
        throw std::runtime_error{"The graph is cyclic."};
-    assert(verifier::EdgeConsistencyChecker().verify(*this));
+    assert(verifier::EdgeChecker().verify(*this));
    }
  
    // Check shape independent operation feature
@@ -127,31 +166,35 @@ void Graph::initializeUseDef()
    });
  }
  
-void Graph::sweepGarbageOperands()
+std::vector<ir::OperationIndex> Graph::topolSortOperations() const
  {
-  // Remove operands that are not used by any operations, except Graph inputs/outputs
-  ir::OperandIndexMap<bool> visited;
-
-  operations().iterate([&](const OperationIndex &, const Operation &node) {
-    for (auto ind : node.getInputs() + node.getOutputs())
+  std::vector<ir::OperationIndex> ret;
+  util::Set<ir::OperationIndex> unvisited;
+  operations().iterate(
+    [&](const ir::OperationIndex &index, const ir::Operation &) { unvisited.add(index); });
+
+  std::function<void(const ir::OperationIndex &, const ir::Operation &)> dfs =
+    [&](const ir::OperationIndex &index, const ir::Operation &op) -> void {
+    if (!unvisited.contains(index))
+      return;
+    unvisited.remove(index);
+
+    for (const auto output : op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
      {
-      visited[ind] = true;
+      const auto &operand = operands().at(output);
+      for (const auto &use : operand.getUses())
+      {
+        dfs(use, operations().at(use));
+      }
      }
-  });
-
-  // Graph's inputs/outputs are always reachable
-  for (auto ind : getInputs() + getOutputs())
-  {
-    visited[ind] = true;
-  }
-
-  operands().iterate([&](const OperandIndex &ind, const Operand &) {
-    if (!visited[ind])
-    {
-      VERBOSE(Graph::sweepGarbageOperands) << "Sweep garbage operand " << ind.value() << std::endl;
-      operands().remove(ind);
-    }
-  });
+    ret.push_back(index);
+  };
+  operations().iterate(dfs);
+
+  assert(unvisited.empty()); // All of the nodes must have been visited
+  // Reversing Postorder DFS result to make it sorted in topoligical order
+  std::reverse(ret.begin(), ret.end());
+  return ret;
  }
  
  } // namespace ir
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc

deleted file mode 100644 (file)

index ac67771..0000000
--- a/runtime/onert/core/src/ir/GraphIterator.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GraphIterator.h"
-
-#include "ir/OperationIndexMap.h"
-#include "compiler/LoweredGraph.h"
-
-namespace onert
-{
-namespace ir
-{
-
-//
-// Graph::DefaultIterator
-//
-
-template <bool is_const>
-void DefaultIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
-  graph.operations().iterate(
-      [&](const OperationIndex &index, NodeRef node) -> void { fn(index, node); });
-}
-
-//
-// Graph::PostDfsIterator
-//
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
-  assert(!graph.isBuildingPhase()); // Restrict iteration condition
-
-  OperationIndexMap<bool> visited;
-  graph.operations().iterate([&](const OperationIndex &index, NodeRef) { visited[index] = false; });
-
-  std::function<void(const OperationIndex &, NodeRef)> dfs_recursive =
-      [&](const OperationIndex &index, NodeRef node) -> void {
-    if (visited[index])
-      return;
-    visited[index] = true;
-
-    for (const auto output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
-    {
-      const auto &operand = graph.operands().at(output);
-      for (const auto &use : operand.getUses())
-      {
-        dfs_recursive(use, graph.operations().at(use));
-      }
-    }
-
-    fn(index, node);
-  };
-
-  graph.operations().iterate(dfs_recursive);
-
-  // All of the operations(nodes) must have been visited.
-  assert(std::all_of(visited.begin(), visited.end(),
-                     [](const std::pair<const OperationIndex, bool> &v) { return v.second; }));
-}
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterateOpSeqs(LoweredGraphRef lowered_graph,
-                                              const OpSeqIterFn &fn) const
-{
-  std::unordered_map<OpSequenceIndex, bool> visited;
-  lowered_graph.op_seqs().iterate(
-      [&](const OpSequenceIndex &index, OpSequenceRef) { visited[index] = false; });
-
-  std::function<void(const OpSequenceIndex &, OpSequenceRef)> dfs_recursive =
-      [&](const OpSequenceIndex &index, OpSequenceRef op_seq) -> void {
-    if (visited[index])
-      return;
-    visited[index] = true;
-
-    for (const auto output : op_seq.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
-    {
-      const auto &operand = lowered_graph.graph().operands().at(output);
-      for (const auto &use : operand.getUses())
-      {
-        const auto use_op_seq_index = lowered_graph.op_seqs().getOperation(use);
-        dfs_recursive(use_op_seq_index, lowered_graph.op_seqs().at(use_op_seq_index));
-      }
-    }
-
-    fn(index, op_seq);
-  };
-
-  lowered_graph.op_seqs().iterate(dfs_recursive);
-
-  // All of the operations(nodes) must have been visited.
-  assert(std::all_of(visited.begin(), visited.end(),
-                     [](const std::pair<const OpSequenceIndex, bool> &v) { return v.second; }));
-}
-
-// Explicit instantiations to have implementation in the source file.
-// NOTE If these instatiations were in the top of this file, `iterate` is compiled and saved in
-//      `GraphIterator.cc.o` but `iterateOpSeqs`. This happens only when cross-building for Android.
-//      (Maybe a bug of NDK toolchain(clang)?)
-
-template class DefaultIterator<true>;
-template class DefaultIterator<false>;
-
-template class PostDfsIterator<true>;
-template class PostDfsIterator<false>;
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h

deleted file mode 100644 (file)

index b54314e..0000000
--- a/runtime/onert/core/src/ir/GraphIterator.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_GRAPH_ITERATOR_H__
-#define __ONERT_IR_GRAPH_ITERATOR_H__
-
-#include <type_traits>
-
-#include "ir/Index.h"
-
-namespace onert
-{
-namespace compiler
-{
-class LoweredGraph;
-} // namespace compiler
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-
-class Graph;
-class Operation;
-class OpSequence;
-
-template <bool is_const> class Iterator
-{
-public:
-  using GraphRef = typename std::conditional<is_const, const Graph &, Graph &>::type;
-  using IndexRef = const OperationIndex &;
-  using NodeRef = typename std::conditional<is_const, const Operation &, Operation &>::type;
-  using IterFn = std::function<void(IndexRef, NodeRef)>;
-
-public:
-  virtual ~Iterator() = default;
-  virtual void iterate(GraphRef graph, const IterFn &fn) const = 0;
-};
-
-template <bool is_const = false> class DefaultIterator final : public Iterator<is_const>
-{
-public:
-  using GraphRef = typename Iterator<is_const>::GraphRef;
-  using IndexRef = typename Iterator<is_const>::IndexRef;
-  using NodeRef = typename Iterator<is_const>::NodeRef;
-  using IterFn = typename Iterator<is_const>::IterFn;
-
-public:
-  void iterate(GraphRef graph, const IterFn &fn) const;
-};
-using DefaultConstIterator = DefaultIterator<true>;
-
-template <bool is_const = false> class PostDfsIterator final : public Iterator<is_const>
-{
-public:
-  using GraphRef = typename Iterator<is_const>::GraphRef;
-  using IndexRef = typename Iterator<is_const>::IndexRef;
-  using NodeRef = typename Iterator<is_const>::NodeRef;
-  using IterFn = typename Iterator<is_const>::IterFn;
-  using LoweredGraphRef =
-      typename std::conditional<is_const, const typename compiler::LoweredGraph &,
-                                typename compiler::LoweredGraph &>::type;
-  using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
-  using OpSeqIndexRef = const OpSequenceIndex &;
-  using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
-
-public:
-  void iterate(GraphRef graph, const IterFn &fn) const;
-  void iterateOpSeqs(LoweredGraphRef lowered_graph, const OpSeqIterFn &f) const;
-};
-using PostDfsConstIterator = PostDfsIterator<true>;
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_GRAPH_ITERATOR_H__
diff --git a/runtime/onert/core/src/ir/OpSequence.cc b/runtime/onert/core/src/ir/OpSequence.cc

deleted file mode 100644 (file)

index e2b989d..0000000
--- a/runtime/onert/core/src/ir/OpSequence.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequence.h"
-
-#include "ir/Operations.h"
-#include "ir/OperationVisitor.h"
-#include <sstream>
-
-namespace
-{
-
-std::string getStrFromIndice(const onert::ir::OperandIndexSequence &indice)
-{
-  std::string str;
-  for (const auto &ind : indice)
-  {
-    str += std::to_string(ind.value());
-    str.push_back(',');
-  }
-  if (str.back() == ',')
-    str.pop_back();
-
-  return str;
-}
-}
-
-namespace onert
-{
-namespace ir
-{
-
-OpSequence::OpSequence(Layout layout) : _layout{layout}, _has_dynamic_tensor{false}
-{
-  // DO NOTHING
-}
-
-void OpSequence::accept(OperationVisitor &v) const { v.visit(*this); }
-
-// TODO: Impl Dumper instead of this method
-std::string getStrFromOpSeq(const OpSequence &op_seq, const Operations &operations)
-{
-  // "  OpSequence IN(0,1,2) -> { op0(0,1,2:3), op1(3:4), op2(4:5) } -> OUT(5)"
-  std::stringstream ss;
-  ss << "  OpSequence IN(" << getStrFromIndice(op_seq.getInputs()) << ") -> {";
-  for (const auto &op_idx : op_seq)
-  {
-    ss << " " << op_idx.value() << "(" << operations.at(op_idx).name() << ":"
-       << getStrFromIndice(operations.at(op_idx).getInputs()) << ":"
-       << getStrFromIndice(operations.at(op_idx).getOutputs()) << ")";
-  }
-  ss << " } -> OUT(" << getStrFromIndice(op_seq.getOutputs()) << ")";
-  return ss.str();
-}
-
-void OpSequence::remove(const OperationIndex &index)
-{
-  assert(exist(index));
-  for (auto it = _operations.cbegin(); it != _operations.cend(); ++it)
-  {
-    if (*it == index)
-    {
-      _operations.erase(it);
-      break;
-    }
-  }
-}
-
-bool OpSequence::exist(const OperationIndex &index) const
-{
-  for (const auto &inner_op_idx : _operations)
-  {
-    if (inner_op_idx == index)
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc

deleted file mode 100644 (file)

index 6888478..0000000
--- a/runtime/onert/core/src/ir/OpSequences.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequences.h"
-#include "util/logging.h"
-#include <memory>
-
-#include <cassert>
-#include <string>
-
-namespace onert
-{
-namespace ir
-{
-
-OpSequenceIndex OpSequences::emplace(const OperationIndex &index, Layout layout)
-{
-  std::unique_ptr<OpSequence> op_seq = std::make_unique<OpSequence>(layout);
-  op_seq->appendOperation(index);
-  const OpSequenceIndex &seq_index = push(std::move(op_seq));
-  cacheSequenceIndex(seq_index, index);
-  return seq_index;
-}
-
-OpSequenceIndex OpSequences::emplace(std::unique_ptr<OpSequence> &&op_seq)
-{
-  auto &operations = op_seq->operations();
-  const OpSequenceIndex &seq_index = push(std::move(op_seq));
-  for (const auto &op_idx : operations)
-  {
-    cacheSequenceIndex(seq_index, op_idx);
-  }
-  return seq_index;
-}
-
-void OpSequences::cacheSequenceIndex(const OpSequenceIndex &seq_index,
-                                     const OperationIndex &op_index) const
-{
-  _seq_indexes.emplace(op_index, seq_index);
-}
-
-OpSequenceIndex *OpSequences::findSequenceIndex(const OperationIndex &operation_index) const
-{
-  // If opration_index is cached, return sequence_index from cache
-  if (_seq_indexes.count(operation_index))
-  {
-    auto &op_seq_index = _seq_indexes.at(operation_index);
-    if (_objects.count(op_seq_index) && _objects.at(op_seq_index)->exist(operation_index))
-    {
-      return &op_seq_index;
-    }
-    else
-    {
-      _seq_indexes.erase(operation_index);
-      return nullptr;
-    }
-  }
-  return nullptr;
-}
-
-bool OpSequences::containsOperation(const OperationIndex &operation_index) const
-{
-  return findOperation(operation_index).valid();
-}
-
-OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index) const
-{
-  OpSequenceIndex ret = findOperation(operation_index);
-  assert(ret.valid());
-  return ret;
-}
-
-void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
-{
-  const auto op_seq_index = findOperation(operation_index);
-  auto &op_seq = at(op_seq_index);
-  _seq_indexes.erase(operation_index);
-  op_seq.remove(operation_index);
-  if (op_seq.size() == 0)
-  {
-    remove(op_seq_index);
-  }
-}
-
-OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index) const
-{
-  if (OpSequenceIndex *op_seq_index = findSequenceIndex(operation_index))
-    return *op_seq_index;
-
-  for (auto &e : _objects)
-  {
-    OpSequence &object = *e.second;
-    auto it = find(object.operations().begin(), object.operations().end(), operation_index);
-    if (it != object.operations().end())
-    {
-      cacheSequenceIndex(e.first, operation_index);
-      return e.first;
-    }
-  }
-  throw std::runtime_error("Operation not found");
-}
-
-void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
-{
-  op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
-    VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
-  });
-}
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/Operand.cc b/runtime/onert/core/src/ir/Operand.cc

index e29c7a6ec4d366b175cd49970c6df036b8d98f56..18981dbf1b33014d500e82d860580e46a5b8d697 100644 (file)
--- a/runtime/onert/core/src/ir/Operand.cc
+++ b/runtime/onert/core/src/ir/Operand.cc
@@ -46,5 +46,11 @@ void Operand::setDef(const OperationIndex &idx) { _def = idx; }
  
  void Operand::unsetDef() { _def = OperationIndex{}; }
  
+void Operand::clearDefUse()
+{
+  unsetDef();
+  _uses.clear();
+}
+
  } // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.cc b/runtime/onert/core/src/ir/OperandIndexSequence.cc

index 73f9282809e0d9d7c3ef2047cb73dc39e5fd2b12..b092f5cee87b73df3608912a1c4704c68e6e88d3 100644 (file)
--- a/runtime/onert/core/src/ir/OperandIndexSequence.cc
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.cc
@@ -62,10 +62,10 @@ OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence
    return ret;
  }
  
-std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &op_seq)
+std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &operand_seq)
  {
    std::string delimeter;
-  for (const auto &ind : op_seq._vec)
+  for (const auto &ind : operand_seq._vec)
    {
      o << delimeter << ind;
      delimeter = ',';
diff --git a/runtime/onert/core/src/ir/Operands.cc b/runtime/onert/core/src/ir/Operands.cc

index ab32e478a4a56005ac9f0d6873197c1b0e07b886..f8cfd16ef199ef55ab41c8910d89eb8ac4f2c464 100644 (file)
--- a/runtime/onert/core/src/ir/Operands.cc
+++ b/runtime/onert/core/src/ir/Operands.cc
@@ -29,7 +29,7 @@ Operands::Operands(const Operands &obj)
    obj.iterate([&](const OperandIndex &index, const Operand &operand) {
      _objects.emplace(index, std::make_unique<Operand>(operand));
    });
-  _index_count = obj._index_count;
+  _next_index = obj._next_index;
  }
  
  } // namespace ir
diff --git a/runtime/onert/core/src/ir/Operation.cc b/runtime/onert/core/src/ir/Operation.cc

index 4af878541defc5e3e2ad85931c215b335903d9e7..64792525dd77c5d7acab19a858ebad0ae7af257c 100644 (file)
--- a/runtime/onert/core/src/ir/Operation.cc
+++ b/runtime/onert/core/src/ir/Operation.cc
@@ -25,14 +25,14 @@ namespace ir
  
  Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
                       const OperandIndexSequence &outputs, OperandConstraint output_constr)
-    : _input_constr{input_constr}, _output_constr{output_constr}
+  : _input_constr{input_constr}, _output_constr{output_constr}
  {
    setInputs(inputs);
    setOutputs(outputs);
  }
  
  Operation::Operation(OperandConstraint input_constr, OperandConstraint output_constr)
-    : _input_constr{input_constr}, _output_constr{output_constr}
+  : _input_constr{input_constr}, _output_constr{output_constr}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/OperationCloner.cc b/runtime/onert/core/src/ir/OperationCloner.cc

index b4e60f0bc31e371884df40f5daea628b9293cb3d..c063158146fc6edbf47d32892feca74311630a9a 100644 (file)
--- a/runtime/onert/core/src/ir/OperationCloner.cc
+++ b/runtime/onert/core/src/ir/OperationCloner.cc
@@ -23,6 +23,23 @@ namespace onert
  namespace ir
  {
  
+namespace
+{
+
+class OperationCloner : public OperationVisitor
+{
+public:
+#define OP(Name) void visit(const operation::Name &o) override;
+#include "ir/Operations.lst"
+#undef OP
+
+public:
+  std::unique_ptr<Operation> releaseClone();
+
+private:
+  std::unique_ptr<Operation> _return_op;
+};
+
  #define OP(Name)                                        \
    void OperationCloner::visit(const operation::Name &o) \
    {                                                     \
@@ -38,5 +55,14 @@ std::unique_ptr<Operation> OperationCloner::releaseClone()
    return std::move(_return_op);
  }
  
+} // namespace
+
+std::unique_ptr<Operation> clone(const Operation &operation)
+{
+  OperationCloner cloner;
+  operation.accept(cloner);
+  return cloner.releaseClone();
+}
+
  } // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationCloner.h b/runtime/onert/core/src/ir/OperationCloner.h

index 0e8cda2a0f40adc62d60815664d4b76399b8fd1b..6424549e9e6f1547a1b42417abf12bd7ecf53a62 100644 (file)
--- a/runtime/onert/core/src/ir/OperationCloner.h
+++ b/runtime/onert/core/src/ir/OperationCloner.h
@@ -26,19 +26,7 @@ namespace onert
  namespace ir
  {
  
-class OperationCloner : public OperationVisitor
-{
-public:
-#define OP(Name) void visit(const operation::Name &o) override;
-#include "ir/Operations.lst"
-#undef OP
-
-public:
-  std::unique_ptr<Operation> releaseClone();
-
-private:
-  std::unique_ptr<Operation> _return_op;
-};
+std::unique_ptr<Operation> clone(const Operation &operation);
  
  } // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc

index a8578b4ce81bd711f0dd54a783e424596294bf91..80e2a3f7ae674ce1677ceaa96f97b5e306381d07 100644 (file)
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -84,8 +84,8 @@ void OperationDumper::visit(const ArgMinMax &node)
  void OperationDumper::visit(const BatchToSpaceND &node)
  {
    std::string block_size =
-      "BlockSize(" +
-      std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
+    "BlockSize(" + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) +
+    ")";
    dumpUnaryInputOp(node, block_size);
  }
  
@@ -114,7 +114,7 @@ void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
  void OperationDumper::visit(const Conv2D &node)
  {
    std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+    node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
    dumpConvOp(node, padding_type);
  }
  
@@ -127,7 +127,7 @@ void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node);
  void OperationDumper::visit(const DepthwiseConv2D &node)
  {
    std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+    node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
    dumpConvOp(node, padding_type);
  }
  
@@ -162,7 +162,7 @@ void OperationDumper::visit(const EmbeddingLookup &node)
  void OperationDumper::visit(const ExpandDims &node)
  {
    std::string axis =
-      "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+    "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
    dumpUnaryInputOp(node, axis);
  }
  
@@ -177,15 +177,15 @@ void OperationDumper::visit(const Fill &node)
  void OperationDumper::visit(const FullyConnected &node)
  {
    std::string inputs =
-      "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
-      ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+    "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+    ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
    dumpUnaryInputOp(node, inputs);
  }
  
  void OperationDumper::visit(const Gather &node)
  {
    std::string indices =
-      "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+    "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
    dumpUnaryInputOp(node, indices);
  }
  
@@ -203,8 +203,8 @@ void OperationDumper::visit(const HashtableLookup &node)
  void OperationDumper::visit(const InstanceNorm &node)
  {
    std::string inputs =
-      "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
-      ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+    "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) + ") Beta(" +
+    std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
    dumpUnaryInputOp(node, inputs);
  }
  
@@ -216,30 +216,29 @@ void OperationDumper::visit(const LSTM &node)
  {
    VERBOSE(LIR) << "* " << node.name() << std::endl;
    VERBOSE(LIR)
-      << "  - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
-      << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
-      << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS)
-      << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS)
-      << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)
-      << ") Recurrent To Input Weights("
-      << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)
-      << ") Recurrent To Forget Weights("
-      << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)
-      << ") Recurrent To Cell Weights("
-      << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)
-      << ") Recurrent To Output Weights("
-      << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights("
-      << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights("
-      << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights("
-      << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias("
-      << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias("
-      << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias("
-      << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias("
-      << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights("
-      << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
-      << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
-      << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
-      << node.getInputs().at(LSTM::Input::CELL_STATE_IN);
+    << "  - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
+    << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
+    << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS)
+    << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS)
+    << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)
+    << ") Recurrent To Input Weights("
+    << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)
+    << ") Recurrent To Forget Weights("
+    << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)
+    << ") Recurrent To Cell Weights(" << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)
+    << ") Recurrent To Output Weights("
+    << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights("
+    << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights("
+    << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights("
+    << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias("
+    << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias("
+    << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias("
+    << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias("
+    << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights("
+    << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
+    << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
+    << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
+    << node.getInputs().at(LSTM::Input::CELL_STATE_IN);
    if (node.getInputs().size() == 24)
    {
      VERBOSE(LIR) << ") Input Layer Normalization Weights("
@@ -291,7 +290,7 @@ void OperationDumper::visit(const Permute &node)
  void OperationDumper::visit(const Pool2D &node)
  {
    std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+    node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
    VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
    VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
                 << std::endl;
@@ -303,7 +302,7 @@ void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
  void OperationDumper::visit(const PReLU &node)
  {
    std::string alpha =
-      "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+    "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
    dumpUnaryInputOp(node, alpha);
  }
  
@@ -315,9 +314,9 @@ void OperationDumper::visit(const Reshape &node)
  {
    // optional param
    std::string shape =
-      node.getInputs().size() == 2
-          ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
-          : "Shape(not provided)";
+    node.getInputs().size() == 2
+      ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+      : "Shape(not provided)";
    dumpUnaryInputOp(node, shape);
  }
  
@@ -356,7 +355,7 @@ void OperationDumper::visit(const ResizeNearestNeighbor &node)
  void OperationDumper::visit(const Reverse &node)
  {
    std::string axis =
-      "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+    "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
    dumpUnaryInputOp(node, axis);
  }
  
@@ -399,10 +398,9 @@ void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
  void OperationDumper::visit(const SpaceToBatchND &node)
  {
    std::string inputs =
-      "BlockSize(" +
-      std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
-      ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
-      ")";
+    "BlockSize(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+    ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+    ")";
    dumpUnaryInputOp(node, inputs);
  }
  
@@ -430,7 +428,7 @@ void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node);
  void OperationDumper::visit(const Tile &node)
  {
    std::string multiples =
-      "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+    "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
    dumpUnaryInputOp(node, multiples);
  }
  
@@ -447,7 +445,7 @@ void OperationDumper::visit(const TopKV2 &node)
  void OperationDumper::visit(const TransposeConv &node)
  {
    std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+    node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
    VERBOSE(LIR) << "* TransposeConv(" << padding_type << ")" << std::endl;
    VERBOSE(LIR) << "  - Inputs : Output Shape("
                 << node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE) << ") KERNEL("
diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc

index 6f81c2a560efdfc8dfe7c56280fe35f27ad50e38..705a37e2c6c2db72279a0121d332c243c3af4570 100644 (file)
--- a/runtime/onert/core/src/ir/OperationValidator.cc
+++ b/runtime/onert/core/src/ir/OperationValidator.cc
@@ -17,6 +17,7 @@
  #include "OperationValidator.h"
  
  #include "ir/Graph.h"
+#include "util/logging.h"
  
  #define OP_REQUIRES(EXP)                                                                         \
    do                                                                                             \
@@ -31,7 +32,7 @@ namespace ir
  {
  
  OperationValidator::OperationValidator(const Graph &graph)
-    : _operations{graph.operations()}, _operands{graph.operands()}
+  : _operations{graph.operations()}, _operands{graph.operands()}
  {
  }
  
@@ -60,7 +61,7 @@ bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const Operan
    if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale())
      return false;
  
-  if (_operands.at(idx1).typeInfo().offset() != _operands.at(idx2).typeInfo().offset())
+  if (_operands.at(idx1).typeInfo().zero_point() != _operands.at(idx2).typeInfo().zero_point())
      return false;
  
    return true;
@@ -177,6 +178,7 @@ void OperationValidator::visit(const operation::Concat &node)
  void OperationValidator::visit(const operation::Conv2D &node)
  {
    const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)};
+  const auto kernel_index{node.getInputs().at(operation::Conv2D::Input::KERNEL)};
    const auto output_index{node.getOutputs().at(0)};
  
    uint32_t stride_horizontal = node.param().stride.horizontal;
@@ -187,6 +189,12 @@ void OperationValidator::visit(const operation::Conv2D &node)
    OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
    OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
    OP_REQUIRES(isSameType(input_index, output_index));
+
+  if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
+  {
+    for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
+      OP_REQUIRES(zeropoint == 0);
+  }
  }
  
  void OperationValidator::visit(const operation::DepthToSpace &node)
@@ -206,6 +214,7 @@ void OperationValidator::visit(const operation::DepthToSpace &node)
  void OperationValidator::visit(const operation::DepthwiseConv2D &node)
  {
    const auto input_index{node.getInputs().at(operation::DepthwiseConv2D::Input::INPUT)};
+  const auto kernel_index{node.getInputs().at(operation::DepthwiseConv2D::Input::KERNEL)};
    const auto output_index{node.getOutputs().at(0)};
  
    uint32_t stride_horizontal = node.param().stride.horizontal;
@@ -216,6 +225,12 @@ void OperationValidator::visit(const operation::DepthwiseConv2D &node)
    OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
    OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
    OP_REQUIRES(isSameType(input_index, output_index));
+
+  if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
+  {
+    for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
+      OP_REQUIRES(zeropoint == 0);
+  }
  }
  
  void OperationValidator::visit(const operation::ElementwiseActivation &node)
@@ -233,22 +248,22 @@ void OperationValidator::visit(const operation::ElementwiseActivation &node)
        break;
      case operation::ElementwiseActivation::Type::LEAKY_RELU:
        OP_REQUIRES(
-          isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
-                                    DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+        isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                  DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
        break;
      case operation::ElementwiseActivation::Type::LOGISTIC:
        OP_REQUIRES(
-          isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
-                                    DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+        isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                  DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
        break;
      case operation::ElementwiseActivation::Type::RELU:
-      OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
-                                            DataType::QUANT_INT8_ASYMM}));
+      OP_REQUIRES(isValidType(
+        input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
        break;
      case operation::ElementwiseActivation::Type::TANH:
        OP_REQUIRES(
-          isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
-                                    DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+        isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                  DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
        break;
    }
  }
@@ -285,8 +300,10 @@ void OperationValidator::visit(const operation::ElementwiseUnary &node)
    }
    else if (node.param().op_type == operation::ElementwiseUnary::Type::QUANTIZE)
    {
-    OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
-    OP_REQUIRES(isValidType(output_index, DataType::QUANT_UINT8_ASYMM));
+    OP_REQUIRES(isValidType(
+      input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+    OP_REQUIRES(
+      isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
    }
    else if (node.param().op_type == operation::ElementwiseUnary::Type::FLOOR)
    {
@@ -310,9 +327,9 @@ void OperationValidator::visit(const operation::EmbeddingLookup &node)
    // TFLite: Allow hybrid type - value table & output
    // NNAPI: Require same value table and output type
    OP_REQUIRES(
-      isSameType(values_index, output_index) ||
-      (isValidType(output_index, DataType::FLOAT32) &&
-       (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
+    isSameType(values_index, output_index) ||
+    (isValidType(output_index, DataType::FLOAT32) &&
+     (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
  }
  
  void OperationValidator::visit(const operation::ExpandDims &node)
@@ -357,9 +374,32 @@ void OperationValidator::visit(const operation::Pack &node)
  
  void OperationValidator::visit(const operation::Pad &node)
  {
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::Pad::Input::INPUT)};
    const auto pad_index{node.getInputs().at(operation::Pad::Input::PAD)};
+  bool isQuantType =
+    isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM});
+  bool isPadV2 = node.getInputs().size() == 3 ? true : false;
  
    OP_REQUIRES(isValidType(pad_index, DataType::INT32));
+  OP_REQUIRES(isSameType(input_index, output_index));
+
+  if (isQuantType)
+    OP_REQUIRES(isSameQuantParam(input_index, output_index));
+
+  if (isPadV2)
+  {
+    const auto value_index{node.getInputs().at(operation::Pad::Input::VALUE)};
+    const bool cond_same = isSameType(input_index, value_index);
+    const bool cond_same_quant = (!isQuantType || isSameQuantParam(input_index, value_index));
+    const auto input_t = operandType(input_index);
+    const auto value_t = operandType(value_index);
+    // NNAPI accepts this case. scale and zeroPoint are assumed to be the same as in input0.
+    const bool cond_quant8 =
+      ((input_t == DataType::QUANT_UINT8_ASYMM || input_t == DataType::QUANT_INT8_ASYMM) &&
+       value_t == DataType::INT32);
+    OP_REQUIRES((cond_same && cond_same_quant) || cond_quant8);
+  }
  }
  
  void OperationValidator::visit(const operation::Rank &node)
@@ -404,6 +444,25 @@ void OperationValidator::visit(const operation::Shape &node)
    OP_REQUIRES(isValidType(output_index, {DataType::UINT32, DataType::INT32, DataType::INT64}));
  }
  
+void OperationValidator::visit(const operation::Slice &node)
+{
+  const auto begins_index{node.getInputs().at(operation::Slice::BEGINS)};
+  const auto sizes_index{node.getInputs().at(operation::Slice::SIZES)};
+
+  OP_REQUIRES(isValidType(begins_index, {DataType::INT32, DataType::INT64}));
+  OP_REQUIRES(isSameType(begins_index, sizes_index));
+}
+
+void OperationValidator::visit(const operation::Softmax &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::Softmax::INPUT)};
+
+  OP_REQUIRES(isSameType(input_index, output_index));
+  OP_REQUIRES(isValidType(
+    output_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+}
+
  void OperationValidator::visit(const operation::SpaceToBatchND &node)
  {
    const auto block_size_index{node.getInputs().at(operation::SpaceToBatchND::Input::BLOCK_SIZE)};
@@ -438,6 +497,17 @@ void OperationValidator::visit(const operation::SquaredDifference &node)
    OP_REQUIRES(isSameType(lhs_index, rhs_index));
  }
  
+void OperationValidator::visit(const operation::StatelessRandomUniform &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto shape_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SHAPE)};
+  const auto seed_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SEED)};
+
+  OP_REQUIRES(isValidType(output_index, DataType::FLOAT32));
+  OP_REQUIRES(isValidType(shape_index, DataType::INT32));
+  OP_REQUIRES(isValidType(seed_index, DataType::INT32));
+}
+
  void OperationValidator::visit(const operation::StridedSlice &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -463,5 +533,5 @@ void OperationValidator::visit(const operation::While &node)
    OP_REQUIRES(node.getInputs().size() == node.getOutputs().size());
  }
  
-} // namespace compiler
+} // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h

index 5b95b16bafdabd67eafffcb2e04e19ccb1102bbd..9829ca0950a58da40fc02d556a48aafca411c7d5 100644 (file)
--- a/runtime/onert/core/src/ir/OperationValidator.h
+++ b/runtime/onert/core/src/ir/OperationValidator.h
@@ -18,6 +18,8 @@
  #define __ONERT_IR_OPERATION_VALIDATOR_H__
  
  #include "ir/OperationVisitor.h"
+#include "ir/Operations.h"
+#include "ir/Operands.h"
  
  namespace onert
  {
@@ -67,10 +69,13 @@ public:
    void visit(const operation::Reverse &node) override;
    void visit(const operation::Select &node) override;
    void visit(const operation::Shape &node) override;
+  void visit(const operation::Slice &node) override;
+  void visit(const operation::Softmax &node) override;
    void visit(const operation::SpaceToBatchND &node) override;
    void visit(const operation::SpaceToDepth &node) override;
    void visit(const operation::Split &node) override;
    void visit(const operation::SquaredDifference &node) override;
+  void visit(const operation::StatelessRandomUniform &node) override;
    void visit(const operation::StridedSlice &node) override;
    void visit(const operation::TransposeConv &node) override;
    void visit(const operation::Unpack &node) override;
diff --git a/runtime/onert/core/src/ir/Operations.cc b/runtime/onert/core/src/ir/Operations.cc

index 64d0bd6f0784940cccaa1ba2911179856f6289b7..e7e0c88cfd4e63243b1d5cd96bed56dbc058a77f 100644 (file)
--- a/runtime/onert/core/src/ir/Operations.cc
+++ b/runtime/onert/core/src/ir/Operations.cc
@@ -25,12 +25,9 @@ namespace ir
  
  Operations::Operations(const Operations &obj)
  {
-  obj.iterate([&](const OperationIndex &index, const Operation &op) {
-    OperationCloner cloner;
-    op.accept(cloner);
-    _objects.emplace(index, cloner.releaseClone());
-  });
-  _index_count = obj._index_count;
+  obj.iterate(
+    [&](const OperationIndex &index, const Operation &op) { _objects.emplace(index, clone(op)); });
+  _next_index = obj._next_index;
  }
  
  } // namespace ir
diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc

index d74f802174ff51064bdd696efa50bf258389ab51..b2b004e7afd221bc14f5653320def12e88fb00c9 100644 (file)
--- a/runtime/onert/core/src/ir/Padding.cc
+++ b/runtime/onert/core/src/ir/Padding.cc
@@ -66,14 +66,14 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
  
    const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
    const int32_t horizontal_expected_output =
-      (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+    (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
  
    const int32_t vertical_needed_input =
-      (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
+    (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
    const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
  
    const int32_t horizontal_needed_input =
-      (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
+    (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
    const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
  
    padding.top = vertical_total_padding / 2;
@@ -90,7 +90,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS
  {
    const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
    const int32_t horizontal_expected_output =
-      (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+    (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
    assert(vertical_expected_output == ofm_shape.H);
    assert(horizontal_expected_output == ofm_shape.W);
  
@@ -129,7 +129,7 @@ Padding::Padding(PaddingType paddingType) : type{paddingType}, param{0, 0, 0, 0}
  }
  
  Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
-    : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
+  : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
  {
    // DO NOTHING
  }
diff --git a/runtime/onert/core/src/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.cc

index 322df7b4c78e3e396bb047061a916871e96067c2..a7c50a26678510c83708b00ff858be5ad4537ddc 100644 (file)
--- a/runtime/onert/core/src/ir/Shape.cc
+++ b/runtime/onert/core/src/ir/Shape.cc
@@ -87,27 +87,30 @@ uint64_t Shape::num_elements() const
                           std::multiplies<uint64_t>());
  }
  
-Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout)
+Shape permuteShape(const Shape &shape, Layout from, Layout to)
  {
    assert(shape.rank() <= Shape::MAX_RANK);
-  Shape backend_shape{shape};
-  if (shape.rank() >= 4 && frontend_layout == Layout::NHWC && backend_layout == Layout::NCHW)
+  Shape ret{shape};
+  if (from == to)
+    return ret;
+  if (shape.rank() < 4)
+    return ret;
+  // Permutation changing layout beyond 4-D is not supported yet
+  assert(shape.rank() <= 4);
+  if (from == Layout::NHWC && to == Layout::NCHW)
    {
-    // Permutation changing layout beyond 4-D is not supported yet
-    assert(shape.rank() <= 4);
-    backend_shape.dim(1) = shape.dim(3);
-    backend_shape.dim(2) = shape.dim(1);
-    backend_shape.dim(3) = shape.dim(2);
+    ret.dim(1) = shape.dim(3);
+    ret.dim(2) = shape.dim(1);
+    ret.dim(3) = shape.dim(2);
    }
-  else if (shape.rank() >= 4 && frontend_layout == Layout::NCHW && backend_layout == Layout::NHWC)
+  else if (from == Layout::NCHW && to == Layout::NHWC)
    {
-    // Permutation changing layout beyond 4-D is not supported yet
-    assert(shape.rank() <= 4);
-    backend_shape.dim(1) = shape.dim(2);
-    backend_shape.dim(2) = shape.dim(3);
-    backend_shape.dim(3) = shape.dim(1);
+    ret.dim(1) = shape.dim(2);
+    ret.dim(2) = shape.dim(3);
+    ret.dim(3) = shape.dim(1);
    }
-  return backend_shape;
+  // Other cases(either `from` or `to` is UNKNOWN), just return the original shape
+  return ret;
  }
  
  } // namespace ir
diff --git a/runtime/onert/core/src/ir/TypeInfo.cc b/runtime/onert/core/src/ir/TypeInfo.cc

index ab8af287e040985108e79f5c1fe06bc0ba9198c0..5d1c7ba8b8d4b00949e3cd31c6fe7c00395b64a1 100644 (file)
--- a/runtime/onert/core/src/ir/TypeInfo.cc
+++ b/runtime/onert/core/src/ir/TypeInfo.cc
@@ -28,7 +28,7 @@ bool operator==(const TypeInfo &lhs, const TypeInfo &rhs)
      return false;
    }
  
-  if (lhs.offset() != rhs.offset())
+  if (lhs.zero_point() != rhs.zero_point())
    {
      return false;
    }
diff --git a/runtime/onert/core/src/ir/operation/AddN.cc b/runtime/onert/core/src/ir/operation/AddN.cc

index ce471252d9abd1b0847f4ed9ef0f80bfeedf57ce..110aeebe77c36092a8a7ae4d0a5363ee2353382f 100644 (file)
--- a/runtime/onert/core/src/ir/operation/AddN.cc
+++ b/runtime/onert/core/src/ir/operation/AddN.cc
@@ -30,7 +30,7 @@ namespace operation
  void AddN::accept(OperationVisitor &v) const { v.visit(*this); }
  
  AddN::AddN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(inputs.size()), inputs, outputs}
+  : Operation{OperandConstraint::createExact(inputs.size()), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ArgMinMax.cc b/runtime/onert/core/src/ir/operation/ArgMinMax.cc

index 989d905bf600f55432aa3d49116c3971220893dd..2f18ff2e2f7567217a66e3a089930b9e243cfe44 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ArgMinMax.cc
+++ b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
@@ -28,7 +28,7 @@ void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); }
  
  ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                       const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc

index 9dc54e6e9aa66395331950319dd8971dd5725ee7..e918d27ae76ff3e0bc652f94456e2d8aab88ac2a 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
@@ -31,7 +31,7 @@ void BCQFullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
  
  BCQFullyConnected::BCQFullyConnected(const OperandIndexSequence &inputs,
                                       const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/BCQGather.cc b/runtime/onert/core/src/ir/operation/BCQGather.cc

index 80efa64608689c806d8d2d024c6bf5485a913f54..f9dfaa3f6d6920158396ad458862aa7890fb2a7c 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BCQGather.cc
+++ b/runtime/onert/core/src/ir/operation/BCQGather.cc
@@ -31,7 +31,7 @@ void BCQGather::accept(OperationVisitor &v) const { v.visit(*this); }
  
  BCQGather::BCQGather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                       const Param &param)
-    : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/BatchMatMul.cc b/runtime/onert/core/src/ir/operation/BatchMatMul.cc

index b9616158dc9480fe8f1a5cc51d432e8db08700c1..20c5682f9d0788fc668f8566a9ad55b10da9274b 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BatchMatMul.cc
+++ b/runtime/onert/core/src/ir/operation/BatchMatMul.cc
@@ -28,7 +28,7 @@ void BatchMatMul::accept(OperationVisitor &v) const { v.visit(*this); }
  
  BatchMatMul::BatchMatMul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                           const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc

index 34be79dd2b41c55dd72ad49bccc4e110720c3fb2..e58e0f486e369525cf920a7cfd029b619e929dae 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
+++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
@@ -31,7 +31,7 @@ void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); }
  
  BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
                                 const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
+  : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc

index 2b1422c731b818b65efc7133a9111aa1f038429b..2d439194f448007c79df97e607e0ea70820a0f91 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -32,7 +32,7 @@ void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); }
  
  BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
                                     const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  
@@ -40,10 +40,10 @@ std::string BinaryArithmetic::name() const
  {
    using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
    static const std::unordered_map<ArithmeticType, std::string> name_map{
-      {ArithmeticType::ADD, std::string{"Add"}},
-      {ArithmeticType::SUB, std::string{"Sub"}},
-      {ArithmeticType::MUL, std::string{"Mul"}},
-      {ArithmeticType::DIV, std::string{"Div"}}};
+    {ArithmeticType::ADD, std::string{"Add"}},
+    {ArithmeticType::SUB, std::string{"Sub"}},
+    {ArithmeticType::MUL, std::string{"Mul"}},
+    {ArithmeticType::DIV, std::string{"Div"}}};
    return name_map.at(_param.arithmetic_type);
  }
  
diff --git a/runtime/onert/core/src/ir/operation/BroadcastTo.cc b/runtime/onert/core/src/ir/operation/BroadcastTo.cc

index a8f5e59cfab243e4e38338c078a83d9f95f72e6d..5da7b5abc0333a674280d7b0a3717891074edb17 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BroadcastTo.cc
+++ b/runtime/onert/core/src/ir/operation/BroadcastTo.cc
@@ -29,7 +29,7 @@ namespace operation
  void BroadcastTo::accept(OperationVisitor &v) const { v.visit(*this); }
  
  BroadcastTo::BroadcastTo(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Comparison.cc b/runtime/onert/core/src/ir/operation/Comparison.cc

index 2f67754113fb51f1b67d98ae885c9f3d0d9e10b9..94c96ff699c3fe5b2f9182a242045efdfd25d955 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Comparison.cc
+++ b/runtime/onert/core/src/ir/operation/Comparison.cc
@@ -31,7 +31,7 @@ void Comparison::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Comparison::Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                         const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Concat.cc b/runtime/onert/core/src/ir/operation/Concat.cc

index 608bc29a691fa9af71df76e7d3fa5ab2ba416d3d..5d99debb75766892d9546237aaede59ce13a5a62 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Concat.cc
+++ b/runtime/onert/core/src/ir/operation/Concat.cc
@@ -31,7 +31,7 @@ void Concat::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Concat::Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Conv2D.cc b/runtime/onert/core/src/ir/operation/Conv2D.cc

index 3a2e1d1fe7e8c986f36f0a3507481bda03f8b94c..725f3e70b3757d6f6770cb311c0050b8f098d59b 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Conv2D.cc
+++ b/runtime/onert/core/src/ir/operation/Conv2D.cc
@@ -31,7 +31,7 @@ void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Conv2D::Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc

index 676e039fac1ed0002c046918a460d3a3c01fbdde..822eb30a96818249d567c0eb30523831476a5b5d 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
@@ -31,7 +31,7 @@ void ConvertFp16ToFp32::accept(OperationVisitor &v) const { v.visit(*this); }
  
  ConvertFp16ToFp32::ConvertFp16ToFp32(const OperandIndexSequence &inputs,
                                       const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc

index bcfcbfc0440fe9a9f373247e7395346cc6b847f2..5e5b42f3bb1d93957b348fe8002e27fa248c493d 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
@@ -31,7 +31,7 @@ void ConvertFp32ToFp16::accept(OperationVisitor &v) const { v.visit(*this); }
  
  ConvertFp32ToFp16::ConvertFp32ToFp16(const OperandIndexSequence &inputs,
                                       const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Custom.cc b/runtime/onert/core/src/ir/operation/Custom.cc

index 25c53e1ba4e904bed9ab2739b51628fd64310312..06c84f81a1d91e30fa56d054c38b65aa1e2a8a46 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Custom.cc
+++ b/runtime/onert/core/src/ir/operation/Custom.cc
@@ -29,7 +29,7 @@ void Custom::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Custom::Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs,
                 const OperandIndexSequence &outputs, std::string id, const Userdata &userdata)
-    : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
+  : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/DepthToSpace.cc b/runtime/onert/core/src/ir/operation/DepthToSpace.cc

index f2d6c7c1bc7074307eca944a52a492bbbc9ca7ce..197c7ee482bbb605efe4c0e934263c34a2370a7a 100644 (file)
--- a/runtime/onert/core/src/ir/operation/DepthToSpace.cc
+++ b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
@@ -31,7 +31,7 @@ void DepthToSpace::accept(OperationVisitor &v) const { v.visit(*this); }
  
  DepthToSpace::DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                             const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc

index d587a5591cdd5161ca6096b436e678ddc00e763f..bef75c5cfe171b3a15da81083ac615cc84dd73b6 100644 (file)
--- a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
@@ -31,7 +31,7 @@ void DepthwiseConv2D::accept(OperationVisitor &v) const { v.visit(*this); }
  
  DepthwiseConv2D::DepthwiseConv2D(const OperandIndexSequence &inputs,
                                   const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Einsum.cc b/runtime/onert/core/src/ir/operation/Einsum.cc

index 3c1473aaa3af665c0afb7433f507851a311c8506..b50f070e735acfbca269d9af6d68d54393b568ba 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Einsum.cc
+++ b/runtime/onert/core/src/ir/operation/Einsum.cc
@@ -28,7 +28,7 @@ void Einsum::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Einsum::Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc

index f6718b656b5db2b37963cc1c28221013ec12f0e6..f3e942f7deba3bba29c9726b82b5e6a870d6cb98 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -33,13 +33,14 @@ void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this);
  ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
                                               const OperandIndexSequence &outputs,
                                               const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
    if (param.op_type == Type::LOGISTIC)
    {
-    assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
-                                                        "sigmoid function(L=1, k=1, x0=0). So, do "
-                                                        "not use alpha and beta");
+    assert(param.alpha == 0.0f && param.beta == 0.0f &&
+           "Logistic will be supported only as "
+           "sigmoid function(L=1, k=1, x0=0). So, do "
+           "not use alpha and beta");
    }
    else if (param.op_type == Type::RELU)
    {
@@ -47,9 +48,10 @@ ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
    }
    else if (param.op_type == Type::TANH)
    {
-    assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
-                                                        "supported only the values of alpha and "
-                                                        "beta are 1.f");
+    assert(param.alpha == 1.0f && param.beta == 1.0f &&
+           "f(x) = alpha * tanh(beta * x), Tanh is "
+           "supported only the values of alpha and "
+           "beta are 1.f");
    }
  }
  
@@ -57,11 +59,11 @@ std::string ElementwiseActivation::name() const
  {
    using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
    static const std::unordered_map<Type, std::string> name_map{
-      {ElementwiseActivationType::ELU, "ELU"},
-      {ElementwiseActivationType::LOGISTIC, "Logistic"},
-      {ElementwiseActivationType::RELU, "ReLU"},
-      {ElementwiseActivationType::TANH, "Tanh"},
-      {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+    {ElementwiseActivationType::ELU, "ELU"},
+    {ElementwiseActivationType::LOGISTIC, "Logistic"},
+    {ElementwiseActivationType::RELU, "ReLU"},
+    {ElementwiseActivationType::TANH, "Tanh"},
+    {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
    return name_map.at(_param.op_type);
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc

index 3287fc0a3e0d4c44e95e91b8e92af23b2b79ac85..8dc42903c9544203dde8a4a0a62433250c678d6f 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -32,7 +32,7 @@ void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); }
  
  ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
                                       const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  
@@ -40,10 +40,10 @@ std::string ElementwiseBinary::name() const
  {
    using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
    static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
-      {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
-      {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
-      {ElementwiseBinaryType::MAX, std::string{"Max"}},
-      {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+    {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+    {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+    {ElementwiseBinaryType::MAX, std::string{"Max"}},
+    {ElementwiseBinaryType::MIN, std::string{"Min"}}};
    return name_map.at(_param.op_type);
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc

index 20b6fa124a73b44c9da15ec44eb61a738d8fdc45..c21c51c05a06cea888b22d6d925e9ebcdd7bfb61 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -32,9 +32,9 @@ void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
  
  ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
                                     const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs,
-                OperandConstraint::createExact(1u)},
-      _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs,
+              OperandConstraint::createExact(1u)},
+    _param{param}
  {
  }
  
@@ -42,23 +42,23 @@ std::string ElementwiseUnary::name() const
  {
    using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
    static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
-      {ElementwiseUnaryType::ABS, std::string{"Abs"}},
-      {ElementwiseUnaryType::CAST, std::string{"Cast"}},
-      {ElementwiseUnaryType::COS, std::string{"Cos"}},
-      {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
-      {ElementwiseUnaryType::ERF, std::string{"Erf"}},
-      {ElementwiseUnaryType::EXP, std::string{"Exp"}},
-      {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
-      {ElementwiseUnaryType::LOG, std::string{"Log"}},
-      {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
-      {ElementwiseUnaryType::NEG, std::string{"Neg"}},
-      {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
-      {ElementwiseUnaryType::ROUND, std::string{"Round"}},
-      {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
-      {ElementwiseUnaryType::SIN, std::string{"Sin"}},
-      {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
-      {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
-      {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+    {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+    {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+    {ElementwiseUnaryType::COS, std::string{"Cos"}},
+    {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+    {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+    {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+    {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+    {ElementwiseUnaryType::LOG, std::string{"Log"}},
+    {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+    {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+    {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+    {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+    {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+    {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+    {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+    {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
+    {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
    return name_map.at(_param.op_type);
  }
  
diff --git a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc

index b300b004ed26cdb97ab9c43a0e200aa510b83e8e..e236747063a7cd8e7c5562366aa296ebe98b86ec 100644 (file)
--- a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
+++ b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
@@ -31,7 +31,7 @@ void EmbeddingLookup::accept(OperationVisitor &v) const { v.visit(*this); }
  
  EmbeddingLookup::EmbeddingLookup(const OperandIndexSequence &inputs,
                                   const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ExpandDims.cc b/runtime/onert/core/src/ir/operation/ExpandDims.cc

index 3f555bd23ac58c9d64a193f8442b222c1ee461e4..50e3636f32aa24df6b385387631633827620ba3f 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ExpandDims.cc
+++ b/runtime/onert/core/src/ir/operation/ExpandDims.cc
@@ -30,7 +30,7 @@ namespace operation
  void ExpandDims::accept(OperationVisitor &v) const { v.visit(*this); }
  
  ExpandDims::ExpandDims(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Fill.cc b/runtime/onert/core/src/ir/operation/Fill.cc

index b8b97d1c0ec082ddf488c2c59bd0ce9c54bd2a35..4a13737d446acf639f34821323e8888d2fb47280 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Fill.cc
+++ b/runtime/onert/core/src/ir/operation/Fill.cc
@@ -30,7 +30,7 @@ namespace operation
  void Fill::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Fill::Fill(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc

index 9837a3137eaee191bdb7fe15b396d76534d72720..335b7b209e8f6f79d46ed8e7720636efd86037f4 100644 (file)
--- a/runtime/onert/core/src/ir/operation/FullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc
@@ -31,7 +31,7 @@ void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
  
  FullyConnected::FullyConnected(const OperandIndexSequence &inputs,
                                 const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc b/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc

index 7b9301ea6c6b19160c5b0117734098cc13cb0a03..b5679f3085c314489fea77b8afe0f4c31bb667a9 100644 (file)
--- a/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc
+++ b/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc
@@ -28,7 +28,7 @@ void FusedBatchNorm::accept(OperationVisitor &v) const { v.visit(*this); }
  
  FusedBatchNorm::FusedBatchNorm(const OperandIndexSequence &inputs,
                                 const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Gather.cc b/runtime/onert/core/src/ir/operation/Gather.cc

index 11d46e75ba3c4096cdffa1ed748cfcbbfd9165ba..96a39b3f2f96c08eb899370d5f371b115b1b8a6c 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Gather.cc
+++ b/runtime/onert/core/src/ir/operation/Gather.cc
@@ -31,7 +31,7 @@ void Gather::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Gather::Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/HashtableLookup.cc b/runtime/onert/core/src/ir/operation/HashtableLookup.cc

index e9a7a82ff38af08dbbd35bf7084f6cdbae51a9be..2974679d41594dabc1feb04c1cee359dfe59fb51 100644 (file)
--- a/runtime/onert/core/src/ir/operation/HashtableLookup.cc
+++ b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
@@ -31,7 +31,7 @@ void HashtableLookup::accept(OperationVisitor &v) const { v.visit(*this); }
  
  HashtableLookup::HashtableLookup(const OperandIndexSequence &inputs,
                                   const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/If.cc b/runtime/onert/core/src/ir/operation/If.cc

index 599751dfdf0db1b7f7d37009a310688d93ed3c7c..380c87dbe2f5da839971d3830ba8c6bdbaadd524 100644 (file)
--- a/runtime/onert/core/src/ir/operation/If.cc
+++ b/runtime/onert/core/src/ir/operation/If.cc
@@ -24,7 +24,7 @@ namespace operation
  {
  void If::accept(OperationVisitor &v) const { v.visit(*this); }
  If::If(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
  {
  }
  } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/InstanceNorm.cc b/runtime/onert/core/src/ir/operation/InstanceNorm.cc

index 2334560ef05e5eaeb5946fd1417f2d54d7b2fd45..d9af9d0b7813a08b6fa1d16bfaaa6568c11954ec 100644 (file)
--- a/runtime/onert/core/src/ir/operation/InstanceNorm.cc
+++ b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
@@ -31,7 +31,7 @@ void InstanceNorm::accept(OperationVisitor &v) const { v.visit(*this); }
  
  InstanceNorm::InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                             const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/L2Normalization.cc b/runtime/onert/core/src/ir/operation/L2Normalization.cc

index 9a7d3eb616574bfbda69e0895cd72d66a38eab4c..0184ef628e3a6ca351467ed4bab6b23e41f58397 100644 (file)
--- a/runtime/onert/core/src/ir/operation/L2Normalization.cc
+++ b/runtime/onert/core/src/ir/operation/L2Normalization.cc
@@ -31,7 +31,7 @@ void L2Normalization::accept(OperationVisitor &v) const { v.visit(*this); }
  
  L2Normalization::L2Normalization(const OperandIndexSequence &inputs,
                                   const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc

index 5cd7c793aa9251d04ed64469727e2a8a16b3f993..45a1fd1206c91a2b458afdbf47728613b5b69e52 100644 (file)
--- a/runtime/onert/core/src/ir/operation/LSTM.cc
+++ b/runtime/onert/core/src/ir/operation/LSTM.cc
@@ -31,7 +31,7 @@ void LSTM::accept(OperationVisitor &v) const { v.visit(*this); }
  
  LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
             const Param &param)
-    : Operation{OperandConstraint::createInRange(20u, 24u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createInRange(20u, 24u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc

index 1ae97c14233574beafa455256bbc931bfc5ea0ae..52037cc72e8c77b5dee77bbed45cbde8556a3fed 100644 (file)
--- a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
+++ b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
@@ -32,7 +32,7 @@ void LocalResponseNormalization::accept(OperationVisitor &v) const { v.visit(*th
  LocalResponseNormalization::LocalResponseNormalization(const OperandIndexSequence &inputs,
                                                         const OperandIndexSequence &outputs,
                                                         const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/LogSoftmax.cc b/runtime/onert/core/src/ir/operation/LogSoftmax.cc

index 73c6580ecf1ee6983cb7b3dc5211d4ec8a571bf8..51f6a6c5d5bc5f3474afe66e21de33b017b58fa6 100644 (file)
--- a/runtime/onert/core/src/ir/operation/LogSoftmax.cc
+++ b/runtime/onert/core/src/ir/operation/LogSoftmax.cc
@@ -31,7 +31,7 @@ void LogSoftmax::accept(OperationVisitor &v) const { v.visit(*this); }
  
  LogSoftmax::LogSoftmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                         const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/LowerInfo.cc b/runtime/onert/core/src/ir/operation/LowerInfo.cc

deleted file mode 100644 (file)

index 249918b..0000000
--- a/runtime/onert/core/src/ir/operation/LowerInfo.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LowerInfo.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-LowerInfo::LowerInfo(const backend::Backend *backend, Layout layout)
-    : _permute_factor{backend, layout}
-{
-  // DO NOTHING
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc

index bac31f13e54e08f612e38277a33a71dc9cfe940c..6046e36fe3a997ad1ca07b9e09800bb60c2f6a78 100644 (file)
--- a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
+++ b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
@@ -31,7 +31,7 @@ void MatrixBandPart::accept(OperationVisitor &v) const { v.visit(*this); }
  
  MatrixBandPart::MatrixBandPart(const OperandIndexSequence &inputs,
                                 const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/OneHot.cc b/runtime/onert/core/src/ir/operation/OneHot.cc

index 22935e7d622907a28a1026a492b29b1b2e663acc..90898f1edea31334fcfecb86e5214df178e036b5 100644 (file)
--- a/runtime/onert/core/src/ir/operation/OneHot.cc
+++ b/runtime/onert/core/src/ir/operation/OneHot.cc
@@ -28,7 +28,7 @@ void OneHot::accept(OperationVisitor &v) const { v.visit(*this); }
  
  OneHot::OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/PReLU.cc b/runtime/onert/core/src/ir/operation/PReLU.cc

index a2e37e0ad19eda6bbb5d591d54f68cb196f25b92..5ed31c2b9d3636526648768630e5d648ad8a21b4 100644 (file)
--- a/runtime/onert/core/src/ir/operation/PReLU.cc
+++ b/runtime/onert/core/src/ir/operation/PReLU.cc
@@ -30,7 +30,7 @@ namespace operation
  void PReLU::accept(OperationVisitor &v) const { v.visit(*this); }
  
  PReLU::PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Pack.cc b/runtime/onert/core/src/ir/operation/Pack.cc

index 784d4162a3db07f714c84f3532ee14443461cda7..00feadfb0d72f5e650160c59f2666565edfa8b92 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Pack.cc
+++ b/runtime/onert/core/src/ir/operation/Pack.cc
@@ -25,7 +25,7 @@ namespace operation
  void Pack::accept(OperationVisitor &v) const { v.visit(*this); }
  Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
             const Param &param)
-    : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
  {
  }
  } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc

index 0c56e92e313e1f8623d347e52a0ace1e4d3408e1..a3f2d97524083fecc246e80ee49ace1a32857988 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Pad.cc
+++ b/runtime/onert/core/src/ir/operation/Pad.cc
@@ -30,7 +30,7 @@ void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
  // PAD: 2 inputs
  // PADV2: 3 inputs
  Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
+  : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Permute.cc b/runtime/onert/core/src/ir/operation/Permute.cc

index eefb6c54238d34961b13bd75143468fecdf05881..571965de89f9a4e683c642e404b6b7541fd1a09f 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Permute.cc
+++ b/runtime/onert/core/src/ir/operation/Permute.cc
@@ -30,7 +30,7 @@ namespace operation
  void Permute::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Permute::Permute(const OperandIndex &input, const OperandIndex &output, Type type)
-    : Operation{OperandConstraint::createExact(1u)}, _type{type}
+  : Operation{OperandConstraint::createExact(1u)}, _type{type}
  {
    setInputs({input});
    setOutputs({output});
diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc

index 761d14c3d62da2eb96c8144be8821bc53ebaf188..cbb42a80a7397b1aeb699afc2fdcd46e172aa15f 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Pool2D.cc
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -32,7 +32,7 @@ void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  
@@ -40,9 +40,9 @@ std::string Pool2D::name() const
  {
    using PoolType = onert::ir::operation::Pool2D::PoolType;
    static const std::unordered_map<PoolType, std::string> name_map{
-      {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
-      {PoolType::L2, "L2" + std::string{toString(opcode())}},
-      {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+    {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+    {PoolType::L2, "L2" + std::string{toString(opcode())}},
+    {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
    return name_map.at(_param.op_type);
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Pow.cc b/runtime/onert/core/src/ir/operation/Pow.cc

index 940b1391a7c271abcf6752bf26e3dd5342aa6255..f1df54c60cf1b488bfb4bb1e70d22e8ba3aca185 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Pow.cc
+++ b/runtime/onert/core/src/ir/operation/Pow.cc
@@ -30,7 +30,7 @@ namespace operation
  void Pow::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Pow::Pow(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/RNN.cc b/runtime/onert/core/src/ir/operation/RNN.cc

index 298c5e745e4e640e1685d7ede8dc9765e71314f2..a40e5bdc92be8599d661155b6b3da04cd7457442 100644 (file)
--- a/runtime/onert/core/src/ir/operation/RNN.cc
+++ b/runtime/onert/core/src/ir/operation/RNN.cc
@@ -31,7 +31,7 @@ void RNN::accept(OperationVisitor &v) const { v.visit(*this); }
  
  RNN::RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
           const Param &param)
-    : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Range.cc b/runtime/onert/core/src/ir/operation/Range.cc

index 96ab04c1bbe650ed11e8c881bb2c1b8f447afe0e..f85d52cb04264250bd0c8308abb06aca463f7893 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Range.cc
+++ b/runtime/onert/core/src/ir/operation/Range.cc
@@ -30,7 +30,7 @@ namespace operation
  void Range::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Range::Range(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Rank.cc b/runtime/onert/core/src/ir/operation/Rank.cc

index c357e9018a022ddac893bcfbf9f0df4a2611e9d7..c33ed0a804d4ca70968ae6afad32d691afc714b4 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Rank.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -30,7 +30,7 @@ namespace operation
  void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Reduce.cc b/runtime/onert/core/src/ir/operation/Reduce.cc

index d6a1d953c0444a8750ec9c43b0fe6543d16fb01d..0811f1c371aefa9f377bead7026e33ef641bcfc2 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Reduce.cc
+++ b/runtime/onert/core/src/ir/operation/Reduce.cc
@@ -32,7 +32,7 @@ void Reduce::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Reduce::Reduce(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  
@@ -40,13 +40,13 @@ std::string Reduce::name() const
  {
    using ReduceType = onert::ir::operation::Reduce::ReduceType;
    static const std::unordered_map<ReduceType, std::string> name_map{
-      {ReduceType::ALL, std::string{toString(opcode())} + "All"},
-      {ReduceType::ANY, std::string{toString(opcode())} + "Any"},
-      {ReduceType::MAX, std::string{toString(opcode())} + "Max"},
-      {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"},
-      {ReduceType::MIN, std::string{toString(opcode())} + "Min"},
-      {ReduceType::PROD, std::string{toString(opcode())} + "Prod"},
-      {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}};
+    {ReduceType::ALL, std::string{toString(opcode())} + "All"},
+    {ReduceType::ANY, std::string{toString(opcode())} + "Any"},
+    {ReduceType::MAX, std::string{toString(opcode())} + "Max"},
+    {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"},
+    {ReduceType::MIN, std::string{toString(opcode())} + "Min"},
+    {ReduceType::PROD, std::string{toString(opcode())} + "Prod"},
+    {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}};
    return name_map.at(_param.reduce_type);
    //  return std::string(toString(opcode())) + reduce_type_str_map.at(_param.reduce_type);
  }
diff --git a/runtime/onert/core/src/ir/operation/Reshape.cc b/runtime/onert/core/src/ir/operation/Reshape.cc

index 92aa89ac6ec1ccb65dde689c29d00e52e0e6f37f..54c12574afd7f3a84715189cfc7ff49940036b35 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Reshape.cc
+++ b/runtime/onert/core/src/ir/operation/Reshape.cc
@@ -31,7 +31,7 @@ void Reshape::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Reshape::Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                   const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param)
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param)
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc

index 71925bb44c58b3ccdc64f8992af5ead59b908d88..7c9f5e104d06d0e2a3ef7114bbf67541a84171d7 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
@@ -31,7 +31,7 @@ void ResizeBilinear::accept(OperationVisitor &v) const { v.visit(*this); }
  
  ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs,
                                 const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc

index 98d0b5f26fd90e45e4b033da0b6954bb4bc7b6b5..9792b292decc9bcb9176a6f1247a4a8a1ecadc12 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -32,7 +32,7 @@ void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this);
  ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
                                               const OperandIndexSequence &outputs,
                                               const Param &param)
-    : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Reverse.cc b/runtime/onert/core/src/ir/operation/Reverse.cc

index 4b3c1e1af34c61a6742ed333cc95d8ed41f32d20..471457739276b6e14fb9be84bffa545a9652027b 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Reverse.cc
+++ b/runtime/onert/core/src/ir/operation/Reverse.cc
@@ -30,7 +30,7 @@ namespace operation
  void Reverse::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Reverse::Reverse(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Select.cc b/runtime/onert/core/src/ir/operation/Select.cc

index 1f22b523427185873a73e557b7523080a532c1c7..59684190c678cd4ecf8283d3a755bc11003f7ef5 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Select.cc
+++ b/runtime/onert/core/src/ir/operation/Select.cc
@@ -28,7 +28,7 @@ namespace operation
  void Select::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Select::Select(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Shape.cc b/runtime/onert/core/src/ir/operation/Shape.cc

index 2a63d6dcf5c7753e80f34d0a8fdf17ba46bc0f43..1b2cd6241758c6b68f4cb7f540efd9fd03a1392a 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Shape.cc
+++ b/runtime/onert/core/src/ir/operation/Shape.cc
@@ -30,7 +30,7 @@ namespace operation
  void Shape::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Shape::Shape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Slice.cc b/runtime/onert/core/src/ir/operation/Slice.cc

index 888b563fb74640bb5b06c480cd5c1c4b49754875..1362c0f913c6cd3297e6ba78af147f258debe807 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Slice.cc
+++ b/runtime/onert/core/src/ir/operation/Slice.cc
@@ -27,7 +27,7 @@ namespace operation
  void Slice::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Slice::Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Softmax.cc b/runtime/onert/core/src/ir/operation/Softmax.cc

index 3f1aa0af130785b30ce64c6f2eee7cbe2ab093ee..91850fa33978117ee99d3df8e35f08e80bf06e61 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Softmax.cc
+++ b/runtime/onert/core/src/ir/operation/Softmax.cc
@@ -31,7 +31,7 @@ void Softmax::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Softmax::Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                   const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc

index 53fab4fa95e8263264a2fdd5b7c34ff0089a3449..97c6308887d259d62dbb93cd986626fff8cfe419 100644 (file)
--- a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
@@ -31,7 +31,7 @@ void SpaceToBatchND::accept(OperationVisitor &v) const { v.visit(*this); }
  
  SpaceToBatchND::SpaceToBatchND(const OperandIndexSequence &inputs,
                                 const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc

index d8a45aee5b588d98f811537507c90a19e2e74c36..e1fd27a55d4fe5ecbd878afaab2204b5dbe0bd8b 100644 (file)
--- a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
@@ -31,7 +31,7 @@ void SpaceToDepth::accept(OperationVisitor &v) const { v.visit(*this); }
  
  SpaceToDepth::SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                             const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc

index b538e9206430149e9d691161e5fd918046db3708..96822822bfcf39a5573ec3abd33de6425c30e854 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Split.cc
+++ b/runtime/onert/core/src/ir/operation/Split.cc
@@ -25,7 +25,7 @@ namespace operation
  void Split::accept(OperationVisitor &v) const { v.visit(*this); }
  Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
               const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/SplitV.cc b/runtime/onert/core/src/ir/operation/SplitV.cc

index e638c9ac924888817d13a535844caee95aafb1af..38918cd811d2b1288d3ba839d4b16f147f16bc41 100644 (file)
--- a/runtime/onert/core/src/ir/operation/SplitV.cc
+++ b/runtime/onert/core/src/ir/operation/SplitV.cc
@@ -25,7 +25,7 @@ namespace operation
  void SplitV::accept(OperationVisitor &v) const { v.visit(*this); }
  SplitV::SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
  {
  }
  } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/SquaredDifference.cc b/runtime/onert/core/src/ir/operation/SquaredDifference.cc

index 49e58aaf2334a1a4db575cce9373bd038980b46d..705b60abcf804ac18a92ac85c604f6292350eb61 100644 (file)
--- a/runtime/onert/core/src/ir/operation/SquaredDifference.cc
+++ b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
@@ -31,7 +31,7 @@ void SquaredDifference::accept(OperationVisitor &v) const { v.visit(*this); }
  
  SquaredDifference::SquaredDifference(const OperandIndexSequence &inputs,
                                       const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Squeeze.cc b/runtime/onert/core/src/ir/operation/Squeeze.cc

index 8cf928fb4dafc609b64ecfc727330143e49aa452..e059c4bee4d4da27309bf48d113e5a9c6288ecab 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Squeeze.cc
+++ b/runtime/onert/core/src/ir/operation/Squeeze.cc
@@ -28,7 +28,7 @@ void Squeeze::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Squeeze::Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                   const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc

index cbb0ff251ddae699601633c56f5e8beaf025b66d..18f1cf5a6a5d1e589c82dc34f5254fd17c20944a 100644 (file)
--- a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
+++ b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
@@ -30,7 +30,7 @@ void StatelessRandomUniform::accept(OperationVisitor &v) const { v.visit(*this);
  
  StatelessRandomUniform::StatelessRandomUniform(const OperandIndexSequence &inputs,
                                                 const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/StridedSlice.cc b/runtime/onert/core/src/ir/operation/StridedSlice.cc

index 2a79059957da18abafe8a1f6eb423378b513439f..e8278b4560db6efb9822c8c0dc128c24559bf263 100644 (file)
--- a/runtime/onert/core/src/ir/operation/StridedSlice.cc
+++ b/runtime/onert/core/src/ir/operation/StridedSlice.cc
@@ -31,7 +31,7 @@ void StridedSlice::accept(OperationVisitor &v) const { v.visit(*this); }
  
  StridedSlice::StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                             const Param &param)
-    : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Tile.cc b/runtime/onert/core/src/ir/operation/Tile.cc

index 5ba3df2ad629671803f372b78f96b7d3fe2d48e1..0ec78557961d4308abdca98d2b3b1c640ccb1e92 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Tile.cc
+++ b/runtime/onert/core/src/ir/operation/Tile.cc
@@ -30,7 +30,7 @@ namespace operation
  void Tile::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Tile::Tile(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/TopKV2.cc b/runtime/onert/core/src/ir/operation/TopKV2.cc

index a5e6c6a8506b75c080321e6af47cdfb93789fd95..a1f39202d4925583c366e9c898ab7c59b81c2f28 100644 (file)
--- a/runtime/onert/core/src/ir/operation/TopKV2.cc
+++ b/runtime/onert/core/src/ir/operation/TopKV2.cc
@@ -31,7 +31,7 @@ void TopKV2::accept(OperationVisitor &v) const { v.visit(*this); }
  
  TopKV2::TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc

index 997f98ab008b36075bb335a75e18ac66c38bd5bf..f2ee52f0e7ec302b8fe3d85c735979d2cb9f950b 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Transpose.cc
+++ b/runtime/onert/core/src/ir/operation/Transpose.cc
@@ -30,7 +30,7 @@ namespace operation
  void Transpose::accept(OperationVisitor &v) const { v.visit(*this); }
  
  Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/TransposeConv.cc b/runtime/onert/core/src/ir/operation/TransposeConv.cc

index 7f29ca44eb772cc2e72e0a01f8fce220c7e11adf..1f405dc6bd76100c0f33bcfac042889df75f0429 100644 (file)
--- a/runtime/onert/core/src/ir/operation/TransposeConv.cc
+++ b/runtime/onert/core/src/ir/operation/TransposeConv.cc
@@ -31,7 +31,7 @@ void TransposeConv::accept(OperationVisitor &v) const { v.visit(*this); }
  
  TransposeConv::TransposeConv(const OperandIndexSequence &inputs,
                               const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Unpack.cc b/runtime/onert/core/src/ir/operation/Unpack.cc

index 67aa54ab5f3024f6ea2f7872276f51644f1c86f1..90d3c0c07258008152a307f7133849693c627855 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Unpack.cc
+++ b/runtime/onert/core/src/ir/operation/Unpack.cc
@@ -25,7 +25,7 @@ namespace operation
  void Unpack::accept(OperationVisitor &v) const { v.visit(*this); }
  Unpack::Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                 const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/While.cc b/runtime/onert/core/src/ir/operation/While.cc

index 2505c60e364eff49035571e42b9ba95e5fe93f17..8a6f5c01e18e3b1e8530c636f3db830f083e20a2 100644 (file)
--- a/runtime/onert/core/src/ir/operation/While.cc
+++ b/runtime/onert/core/src/ir/operation/While.cc
@@ -25,7 +25,7 @@ namespace operation
  void While::accept(OperationVisitor &v) const { v.visit(*this); }
  While::While(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
               const Param &param)
-    : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
  {
  }
  } // namespace operation
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.cc

index 7d05acb2862aee8d7901463eb5b2e90f0df530f5..25a82d5a2adbf8c2ce485fcbe24841bbbf74c374 100644 (file)
--- a/runtime/onert/core/src/ir/verifier/Verifier.cc
+++ b/runtime/onert/core/src/ir/verifier/Verifier.cc
@@ -39,11 +39,11 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
  
    OperationIndexMap<bool> visited;
    operations.iterate(
-      [&](const OperationIndex &index, const Operation &) { visited[index] = false; });
+    [&](const OperationIndex &index, const Operation &) { visited[index] = false; });
    OperationIndexMap<bool> on_stack = visited; // Copy from visited
  
    std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive =
-      [&](const OperationIndex &index, const Operation &node) -> void {
+    [&](const OperationIndex &index, const Operation &node) -> void {
      if (on_stack[index])
        cyclic = true;
      if (visited[index])
@@ -72,7 +72,7 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
  // EdgeConsistencyVerifier
  //
  
-bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
+bool EdgeChecker::verify(const Graph &graph) const noexcept
  {
    auto &operations = graph.operations();
    uint32_t errors = 0;
@@ -85,17 +85,16 @@ bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
          bool operand_has_use = operand.getUses().contains(index);
          if (!operand_has_use)
          {
-          VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
-                                          << operand_index << " to Operation " << index
-                                          << std::endl;
+          VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
+                               << operand_index << " to Operation " << index << std::endl;
            errors += 1;
          }
        }
        catch (const std::out_of_range &e)
        {
-        VERBOSE(EdgeConsistencyChecker)
-            << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
-            << operand_index << ", but the operand object is not present in the graph" << std::endl;
+        VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index
+                             << " has Operand " << operand_index
+                             << ", but the operand object is not present in the graph" << std::endl;
          errors += 1;
        }
      }
@@ -106,23 +105,22 @@ bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
          auto &operand = graph.operands().at(operand_index);
          if (operand.getDef() != index)
          {
-          VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
-                                          << operand_index << " to Operation " << index
-                                          << std::endl;
+          VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
+                               << operand_index << " to Operation " << index << std::endl;
            errors += 1;
          }
        }
        catch (const std::out_of_range &e)
        {
-        VERBOSE(EdgeConsistencyChecker)
-            << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
-            << operand_index << ", but the operand object is not present in the graph" << std::endl;
+        VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index
+                             << " has Operand " << operand_index
+                             << ", but the operand object is not present in the graph" << std::endl;
          errors += 1;
        }
      }
    });
  
-  VERBOSE(EdgeConsistencyChecker) << "Total Number of errors : " << errors << std::endl;
+  VERBOSE(EdgeChecker) << "Total Number of errors : " << errors << std::endl;
  
    return errors == 0;
  }
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.h b/runtime/onert/core/src/ir/verifier/Verifier.h

index 143db343ab31b1cbb9e9d74d290a50f4722b95f0..fa1311983514f839a0500c5a73b88ea1963f2f49 100644 (file)
--- a/runtime/onert/core/src/ir/verifier/Verifier.h
+++ b/runtime/onert/core/src/ir/verifier/Verifier.h
@@ -55,7 +55,7 @@ public:
    bool verify(const Graph &graph) const noexcept override;
  };
  
-class EdgeConsistencyChecker : public IVerifier
+class EdgeChecker : public IVerifier
  {
  public:
    bool verify(const Graph &graph) const noexcept override;
diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc

new file mode 100644 (file)

index 0000000..3fc0c8e
--- /dev/null
+++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EventWriter.h"
+
+#include <sstream>
+#include <vector>
+#include <cassert>
+#include <utility>
+
+// json type for ChromeTracingWriter
+namespace
+{
+
+std::string quote(const std::string &value)
+{
+  std::stringstream ss;
+  ss << '"' << value << '"';
+  return ss.str();
+}
+
+std::string field(const std::string &k, const std::string &v)
+{
+  std::stringstream ss;
+  ss << quote(k) << " : " << quote(v);
+  return ss.str();
+}
+
+struct Content // One Entry in Chrome Event Trace
+{
+  std::vector<std::pair<std::string, std::string>> flds;
+  std::vector<std::pair<std::string, std::string>> args;
+};
+
+std::string object(const Content &content)
+{
+  std::stringstream ss;
+
+  ss << "{ ";
+
+  ss << field(content.flds[0].first, content.flds[0].second);
+
+  for (uint32_t n = 1; n < content.flds.size(); ++n)
+  {
+    ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
+  }
+
+  if (content.args.size() > 0)
+  {
+    ss << ", " << quote("args") << " : { ";
+    ss << field(content.args.at(0).first, content.args.at(0).second);
+
+    for (uint32_t n = 1; n < content.args.size(); ++n)
+    {
+      ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
+    }
+
+    ss << "}";
+  }
+
+  ss << " }";
+
+  return ss.str();
+}
+
+void fill(Content &content, const DurationEvent &evt, const std::string &name,
+          const std::string &tid)
+{
+  content.flds.emplace_back("name", name);
+  content.flds.emplace_back("pid", "0");
+  content.flds.emplace_back("tid", tid);
+  content.flds.emplace_back("ph", evt.ph);
+  content.flds.emplace_back("ts", evt.ts);
+  content.args = evt.args;
+}
+
+void fill(Content &content, const CounterEvent &evt)
+{
+  assert(evt.name != "");
+
+  content.flds.emplace_back("name", evt.name);
+  content.flds.emplace_back("pid", "0");
+  content.flds.emplace_back("tid", evt.tid);
+  content.flds.emplace_back("ph", evt.ph);
+  content.flds.emplace_back("ts", evt.ts);
+  content.args = evt.args;
+}
+
+std::string object(const DurationEvent &evt, const std::string &name, const std::string &tid)
+{
+  Content content;
+
+  fill(content, evt, name, tid);
+
+  return ::object(content);
+}
+
+std::string object(const CounterEvent &evt)
+{
+  Content content;
+
+  fill(content, evt);
+
+  for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
+  {
+    content.args.emplace_back(it->first, it->second);
+  }
+
+  return ::object(content);
+}
+
+std::string getSessionLabel(const DurationEvent &evt)
+{
+  return "$" + std::to_string(evt.session_index) + " sess";
+}
+
+std::string getSubgLabel(const DurationEvent &evt)
+{
+  return "$" + std::to_string(evt.subg_index) + " subg";
+}
+
+std::string getOpLabel(const OpSeqDurationEvent &evt)
+{
+  return "@" + std::to_string(evt.op_index) + " " + evt.op_name;
+}
+
+std::string getLabel(const DurationEvent &evt)
+{
+  if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+  {
+    return getOpLabel(*evt_ptr);
+  }
+  else // SubgDurationEvent
+  {
+    return getSubgLabel(evt);
+  }
+}
+
+std::string getTid(const DurationEvent &evt)
+{
+  if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+  {
+    return getSessionLabel(*evt_ptr) + ", " + getSubgLabel(*evt_ptr) + ", " + evt_ptr->backend;
+  }
+  else // SubgDurationEvent
+  {
+    return getSessionLabel(evt) + ", " + getSubgLabel(evt);
+  }
+}
+
+} // namespace
+
+void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
+{
+  _os << "{\n";
+  _os << "  " << quote("traceEvents") << ": [\n";
+
+  for (auto &recorder : recorders)
+  {
+    flushOneRecord(*recorder);
+  }
+
+  _os << "    { }\n";
+  _os << "  ]\n";
+  _os << "}\n";
+}
+
+void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
+{
+  for (auto &evt : recorder.duration_events())
+  {
+    const std::string name = getLabel(*evt);
+    const std::string tid = getTid(*evt);
+
+    _os << "    " << object(*evt, name, tid) << ",\n";
+  }
+
+  for (auto &evt : recorder.counter_events())
+  {
+    _os << "    " << object(evt) << ",\n";
+  }
+}
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc

index fd56187142461e35a6076f4e79f905698949f1e5..83c2649d1fb80f943a13123fe248dbcf8c963390 100644 (file)
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -30,26 +30,62 @@ std::string timestamp(void)
  {
    auto now = std::chrono::steady_clock::now();
    return std::to_string(
-      std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
+    std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
  }
  
-class DurationEventBuilder
+class DurationEventBuilder : public EventCollector::EventVisitor
  {
  public:
    DurationEventBuilder(const std::string &ts) : _ts{ts} {}
  
-  DurationEvent build(const EventCollector::Event &evt_collected, const std::string &ph) const
+  std::unique_ptr<SubgDurationEvent> build(const EventCollector::SubgEvent &evt_collected,
+                                           const std::string &ph) const
    {
-    DurationEvent evt;
+    auto dur_evt = std::make_unique<SubgDurationEvent>();
  
-    evt.name = evt_collected.label;
-    evt.tid = evt_collected.backend;
-    evt.ph = ph;
-    evt.ts = _ts;
+    // The following will be set by a child of EventsWriter:
+    // dur_evt.name, dur_evt.tid
+    dur_evt->ph = ph;
+    dur_evt->ts = _ts;
+    dur_evt->tracing_ctx = evt_collected.tracing_ctx;
  
-    evt.args = evt_collected.userData;
+    dur_evt->session_index = evt_collected.session_index;
+    dur_evt->subg_index = evt_collected.subg_index;
  
-    return evt;
+    dur_evt->args = evt_collected.userData;
+    {
+      dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index));
+      dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index));
+    }
+
+    return dur_evt;
+  }
+
+  std::unique_ptr<OpSeqDurationEvent> build(const EventCollector::OpSeqEvent &evt_collected,
+                                            const std::string &ph) const
+  {
+    auto dur_evt = std::make_unique<OpSeqDurationEvent>();
+
+    // The following will be set by a child of EventsWriter:
+    // dur_evt.name, dur_evt.tid
+    dur_evt->ph = ph;
+    dur_evt->ts = _ts;
+    dur_evt->tracing_ctx = evt_collected.tracing_ctx;
+
+    dur_evt->session_index = evt_collected.session_index;
+    dur_evt->subg_index = evt_collected.subg_index;
+
+    dur_evt->backend = evt_collected.backend;
+    dur_evt->op_index = evt_collected.op_index;
+    dur_evt->op_name = evt_collected.op_name;
+
+    dur_evt->args = evt_collected.userData;
+    {
+      dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index));
+      dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index));
+    }
+
+    return dur_evt;
    }
  
  private:
@@ -88,19 +124,26 @@ inline void emit_rusage(EventRecorder *rec, const std::string &ts)
  
  } // namespace
  
-void EventCollector::onEvent(const Event &event)
+template <typename EventT> void EventCollector::onEvent(const EventT &event)
  {
    auto ts = timestamp();
  
+  DurationEventBuilder builder(ts);
+
    switch (event.edge)
    {
      case Edge::BEGIN:
-      _rec->emit(DurationEventBuilder(ts).build(event, "B"));
+    {
+      auto duration_evt = builder.build(event, "B");
+      _rec->emit(std::move(duration_evt));
        break;
-
+    }
      case Edge::END:
-      _rec->emit(DurationEventBuilder(ts).build(event, "E"));
+    {
+      auto duration_evt = builder.build(event, "E");
+      _rec->emit(std::move(duration_evt));
        break;
+    }
    }
  
  // TODO: Add resurece measurement(e.g. RSS)
@@ -109,3 +152,7 @@ void EventCollector::onEvent(const Event &event)
    emit_rusage(_rec, ts);
  #endif
  }
+
+// template instantiation
+template void EventCollector::onEvent<EventCollector::SubgEvent>(const SubgEvent &event);
+template void EventCollector::onEvent<EventCollector::OpSeqEvent>(const OpSeqEvent &event);
diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h

index 7daa4851ff3d66c14946c2111171141add24be31..774fe05ef1297658714f64f0a68f14585262c8f8 100644 (file)
--- a/runtime/onert/core/src/util/EventCollector.h
+++ b/runtime/onert/core/src/util/EventCollector.h
@@ -18,6 +18,7 @@
  #define __ONERT_UTIL_EVENT_COLLECTOR_H__
  
  #include "util/EventRecorder.h"
+#include "util/TracingCtx.h"
  
  #include <vector>
  #include <utility>
@@ -32,29 +33,71 @@ public:
      END
    };
  
+  struct SubgEvent;
+  struct OpEvent;
+
+  class EventVisitor
+  {
+  public:
+    virtual ~EventVisitor() = default;
+
+    virtual std::unique_ptr<DurationEvent> visit(const SubgEvent &, const std::string &) const
+    {
+      throw std::runtime_error("Please implement");
+    }
+    virtual std::unique_ptr<DurationEvent> visit(const OpEvent &, const std::string &) const
+    {
+      throw std::runtime_error("Please implement");
+    }
+  };
+
    struct Event
    {
+    const onert::util::TracingCtx *tracing_ctx;
+
      Edge edge;
      uint32_t session_index;
      uint32_t subg_index;
-    std::string backend;
-    uint32_t op_index;
-    std::string op_name;
-    uint32_t op_seq_size; // if this event is for an operation sequence of multiple operations
-
-    // TODO deprecate this. label can be differ by writer. So let the writer decide label.
-    std::string label;
  
      // user-defined data: pairs of (key, value)
      std::vector<std::pair<std::string, std::string>> userData;
  
-    Event(Edge a_edge, const std::string &a_backend, const std::string &a_label)
-        : edge(a_edge), session_index(0), subg_index(0), backend(a_backend), op_index(0),
-          op_seq_size(0), label(a_label)
+  protected:
+    Event(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index)
+      : tracing_ctx(a_tracing_ctx), edge(a_edge), session_index(tracing_ctx->getSessionId()),
+        subg_index(a_subg_index)
+    { /* empty */
+    }
+
+    virtual ~Event() = default;
+  };
+
+  struct SubgEvent : public Event
+  {
+    // constructor for subgraph start and end event
+    SubgEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index)
+      : Event(a_tracing_ctx, a_edge, a_subg_index)
      { /* empty */
      }
    };
  
+  // TODO Rename this to OperationEvent
+  struct OpSeqEvent : public Event
+  {
+    std::string backend;
+    uint32_t op_index;
+    std::string op_name;
+
+    OpSeqEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index,
+               const std::string a_backend, uint32_t a_op_index, const std::string a_op_name)
+      : Event(a_tracing_ctx, a_edge, a_subg_index)
+    {
+      backend.assign(a_backend);
+      op_index = a_op_index;
+      op_name.assign(a_op_name);
+    }
+  };
+
  public:
    EventCollector(EventRecorder *rec) : _rec{rec}
    {
@@ -62,7 +105,7 @@ public:
    }
  
  public:
-  void onEvent(const Event &event);
+  template <typename EventT> void onEvent(const EventT &event);
  
  protected:
    EventRecorder *_rec;
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc

index 3714e4f02c0b01308601da62980bb12085ceee09..5d3d5f5c602083633aeebd2fb560c2f9ddc929e3 100644 (file)
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -16,11 +16,11 @@
  
  #include "util/EventRecorder.h"
  
-void EventRecorder::emit(const DurationEvent &evt)
+void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt)
  {
    std::lock_guard<std::mutex> lock{_mu};
  
-  _duration_events.push_back(evt);
+  _duration_events.push_back(std::move(evt));
  }
  
  void EventRecorder::emit(const CounterEvent &evt)
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h

index 3ed40875f32ee834f9cbf96b6b8c4d804957d6dc..5cf03d8ac5ba0e3f94e2d913f02b9719c49395f0 100644 (file)
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -17,28 +17,52 @@
  #ifndef __ONERT_UTIL_EVENT_RECORDER_H__
  #define __ONERT_UTIL_EVENT_RECORDER_H__
  
+#include "util/TracingCtx.h"
+
  #include <map>
  #include <memory>
  #include <mutex>
  
  #include <vector>
  
+// refer to https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/edit#
  struct Event
  {
-  std::string name;
-  std::string tid;
-  std::string ph;                                        /* REQUIRED */
-  std::string ts;                                        /* REQUIRED */
+  const onert::util::TracingCtx *tracing_ctx;
+
+  std::string ph;                                        // Event type.
+  std::string ts;                                        // tracing clock of timestamp of this event
    std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value)
+
+  virtual ~Event() = default;
  };
  
  struct DurationEvent : public Event
  {
-  // TO BE FILLED
+  uint32_t session_index = 0;
+  uint32_t subg_index = 0;
+
+protected:
+  DurationEvent() = default;
+};
+
+struct SubgDurationEvent : public DurationEvent
+{ /* same with DurationEvent */
+};
+
+// TODO Rename it to OperationDurationEvent
+struct OpSeqDurationEvent : public DurationEvent
+{
+  // Note: DurationEvent's name and tid will be set by EventWriter
+  std::string backend;
+  uint32_t op_index;
+  std::string op_name;
  };
  
  struct CounterEvent : public Event
  {
+  std::string name; // name of event
+  std::string tid;  // thread ID
    std::map<std::string, std::string> values;
  };
  
@@ -53,17 +77,19 @@ public:
    EventRecorder() = default;
  
  public:
-  void emit(const DurationEvent &evt);
+  void emit(std::unique_ptr<DurationEvent> &&evt);
    void emit(const CounterEvent &evt);
  
  public:
-  bool empty() { return _duration_events.empty() && _counter_events.empty(); }
-  const std::vector<DurationEvent> &duration_events() const { return _duration_events; }
+  const std::vector<std::unique_ptr<DurationEvent>> &duration_events() const
+  {
+    return _duration_events;
+  }
    const std::vector<CounterEvent> &counter_events() const { return _counter_events; }
  
  private:
    std::mutex _mu;
-  std::vector<DurationEvent> _duration_events;
+  std::vector<std::unique_ptr<DurationEvent>> _duration_events;
    std::vector<CounterEvent> _counter_events;
  };
  
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc

index 8760a16db46f11907adc72355a71c4f5a716b552..c42c53730f78ff38ade72c0a71fd3fb52e9034e4 100644 (file)
--- a/runtime/onert/core/src/util/EventWriter.cc
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -16,547 +16,7 @@
  
  #include "util/EventWriter.h"
  
-#include <sstream>
-#include <vector>
-#include <unordered_map>
-#include <json/json.h>
-#include <assert.h>
-#include <utility>
-#include <map>
-#include <set>
-#include <stdint.h>
-#include <fstream>
-
-// json type for Chrome Event Trace
-namespace
-{
-
-std::string quote(const std::string &value)
-{
-  std::stringstream ss;
-  ss << '"' << value << '"';
-  return ss.str();
-}
-
-std::string field(const std::string &k, const std::string &v)
-{
-  std::stringstream ss;
-  ss << quote(k) << " : " << quote(v);
-  return ss.str();
-}
-
-struct Content // One Entry in Chrome Event Trace
-{
-  std::vector<std::pair<std::string, std::string>> flds;
-  std::vector<std::pair<std::string, std::string>> args;
-};
-
-std::string object(const Content &content)
-{
-  std::stringstream ss;
-
-  ss << "{ ";
-
-  ss << field(content.flds[0].first, content.flds[0].second);
-
-  for (uint32_t n = 1; n < content.flds.size(); ++n)
-  {
-    ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
-  }
-
-  if (content.args.size() > 0)
-  {
-    ss << ", " << quote("args") << " : { ";
-    ss << field(content.args.at(0).first, content.args.at(0).second);
-
-    for (uint32_t n = 1; n < content.args.size(); ++n)
-    {
-      ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
-    }
-
-    ss << "}";
-  }
-
-  ss << " }";
-
-  return ss.str();
-}
-
-void fill(Content &content, const Event &evt)
-{
-  content.flds.emplace_back("name", evt.name);
-  content.flds.emplace_back("pid", "0");
-  content.flds.emplace_back("tid", evt.tid);
-  content.flds.emplace_back("ph", evt.ph);
-  content.flds.emplace_back("ts", evt.ts);
-  content.args = evt.args;
-}
-
-std::string object(const DurationEvent &evt)
-{
-  Content content;
-
-  fill(content, evt);
-
-  return ::object(content);
-}
-
-std::string object(const CounterEvent &evt)
-{
-  Content content;
-
-  fill(content, evt);
-
-  for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
-  {
-    content.args.emplace_back(it->first, it->second);
-  }
-
-  return ::object(content);
-}
-
-} // namespace
-
-// md table type
-namespace
-{
-
-void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
-{
-  os << "| ";
-  for (auto &key : list)
-  {
-    os << key << " | ";
-  }
-  os << "\n";
-}
-
-struct MDContent
-{
-  std::string name;
-  uint64_t begin_ts;
-  uint64_t end_ts;
-  uint32_t min_rss;
-  uint32_t max_rss;
-  uint32_t min_page_reclaims;
-  uint32_t max_page_reclaims;
-
-  MDContent()
-      : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
-        max_page_reclaims(0)
-  {
-    // DO NOTHING
-  }
-
-  virtual ~MDContent() = default;
-
-  void updateRss(uint32_t rss)
-  {
-    if (min_rss == UINT32_MAX)
-      min_rss = rss;
-    if (max_rss == 0)
-      max_rss = rss;
-
-    if (min_rss > rss)
-      min_rss = rss;
-    else if (max_rss < rss)
-      max_rss = rss;
-  }
-
-  void updateMinflt(uint32_t minflt)
-  {
-    if (min_page_reclaims == UINT32_MAX)
-      min_page_reclaims = minflt;
-    if (max_page_reclaims == 0)
-      max_page_reclaims = minflt;
-
-    if (min_page_reclaims > minflt)
-      min_page_reclaims = minflt;
-    else if (max_page_reclaims < minflt)
-      max_page_reclaims = minflt;
-  }
-
-  virtual void write(std::ostream &os) const = 0;
-};
-
-struct OpSeq : public MDContent
-{
-  std::string backend;
-  uint64_t graph_latency;
-
-  struct OpSeqCmp
-  {
-    bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
-    {
-      return lhs.begin_ts < rhs.begin_ts;
-    }
-    bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
-    bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
-  };
-
-  void write(std::ostream &os) const override
-  {
-    uint64_t opseq_latency = end_ts - begin_ts;
-    double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
-    writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
-                         std::to_string(min_rss), std::to_string(max_rss),
-                         std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
-  }
-};
-
-struct Graph : public MDContent
-{
-  std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
-
-  void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
-  {
-    uint64_t graph_latency = end_ts - begin_ts;
-    for (auto it : name_to_opseq)
-    {
-      auto opseq = it.second;
-      opseq.graph_latency = graph_latency;
-
-      opseqs.insert(opseq);
-
-      updateRss(opseq.min_rss);
-      updateRss(opseq.max_rss);
-      updateMinflt(opseq.min_page_reclaims);
-      updateMinflt(opseq.max_page_reclaims);
-    }
-  }
-
-  void write(std::ostream &os) const override
-  {
-    static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
-                                                  "page_reclaims_min", "page_reclaims_max"};
-
-    static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
-                                                       "-----------------", "-----------------"};
-
-    // Graph's Header
-    writeMDTableRow(os, graph_headers);
-    writeMDTableRow(os, graph_headers_line);
-
-    // Graph's contents
-    writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
-                         std::to_string(max_rss), std::to_string(min_page_reclaims),
-                         std::to_string(max_page_reclaims)});
-
-    os << "\n";
-
-    static std::vector<std::string> opseq_headers{
-        "OpSeq name",  "backend",     "latency(us)",       "latency(%)",
-        "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
-
-    static std::vector<std::string> opseq_headers_line{
-        "----------", "-------", "-----------",       "-----------",
-        "-------",    "-------", "-----------------", "-----------------"};
-
-    os << "## OpSequences \n";
-
-    // OpSeq's Header
-    writeMDTableRow(os, opseq_headers);
-    writeMDTableRow(os, opseq_headers_line);
-
-    // OpSeq's contents
-    for (auto opseq : opseqs)
-    {
-      opseq.write(os);
-    }
-
-    os << "\n";
-  }
-};
-
-struct MDTableBuilder
-{
-  MDTableBuilder(const std::vector<DurationEvent> &duration_events,
-                 const std::vector<CounterEvent> &counter_events)
-      : _duration_events(duration_events), _counter_events(counter_events)
-  {
-// when ready with low overhead in release build
-#ifdef DEBUG
-    for (const auto &evt : _counter_events)
-    {
-      uint64_t ts = std::stoull(evt.ts);
-      auto &name = evt.name;
-      assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
-      assert(evt.values.size() == 1);
-      auto &val = evt.values.begin()->second;
-      if (_ts_to_values.find(ts) == _ts_to_values.end())
-      {
-        std::pair<uint32_t, uint32_t> values;
-        if (name.compare("maxrss") == 0)
-          values.first = std::stoul(val);
-        else
-          values.second = std::stoul(val);
-        _ts_to_values.insert({ts, values});
-      }
-      else
-      {
-        auto &values = _ts_to_values.at(ts);
-        if (name.compare("maxrss") == 0)
-          values.first = std::stoul(val);
-        else
-          values.second = std::stoul(val);
-      }
-    }
-#endif
-  }
-
-  MDTableBuilder &build()
-  {
-    for (auto &it : divideGraph())
-    {
-      size_t begin_idx = it.first;
-      size_t end_idx = it.second;
-      std::map<std::string, OpSeq> name_to_opseq;
-      for (size_t i = begin_idx + 1; i < end_idx; ++i)
-      {
-        const auto &evt = _duration_events[i];
-        assert(evt.name.compare("Graph") != 0);
-        assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
-        if (evt.ph.compare("B") == 0)
-        {
-          assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
-          name_to_opseq.insert({evt.name, makeOpSeq(evt)});
-        }
-        else
-        {
-          assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
-          auto &opseq = name_to_opseq.at(evt.name);
-          updateOpSeq(opseq, evt);
-        }
-      }
-
-      _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
-    }
-
-    return *this;
-  }
-
-  std::vector<std::pair<size_t, size_t>> divideGraph()
-  {
-    std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
-    for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
-    {
-      const auto &evt = _duration_events.at(i);
-      if (evt.name.compare("Graph") == 0)
-      {
-        if (evt.ph.compare("B") == 0)
-          begin_idx = i;
-        else
-          graph_idx_list.emplace_back(begin_idx, i);
-      }
-    }
-    return graph_idx_list;
-  }
-
-  OpSeq makeOpSeq(const DurationEvent &evt)
-  {
-    OpSeq opseq;
-    opseq.name = evt.name;
-    opseq.begin_ts = std::stoull(evt.ts);
-    opseq.backend = evt.tid;
-#ifdef DEBUG
-    opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
-    opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
-#else
-    opseq.updateRss(0);
-    opseq.updateMinflt(0);
-#endif
-    return opseq;
-  }
-
-  void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
-  {
-    opseq.end_ts = std::stoull(evt.ts);
-#ifdef DEBUG
-    opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
-    opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
-#else
-    opseq.updateRss(0);
-    opseq.updateMinflt(0);
-#endif
-  }
-
-  Graph makeGraph(size_t begin_idx, size_t end_idx,
-                  const std::map<std::string, OpSeq> &name_to_opseq)
-  {
-    Graph graph;
-    graph.name = "Graph";
-    graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
-    graph.end_ts = std::stoull(_duration_events[end_idx].ts);
-    graph.setOpSeqs(name_to_opseq);
-#ifdef DEBUG
-    graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
-    graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
-    graph.updateRss(_ts_to_values.at(graph.end_ts).first);
-    graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
-#else
-    graph.updateRss(0);
-    graph.updateMinflt(0);
-#endif
-    return graph;
-  }
-
-  void write(std::ostream &os)
-  {
-    // Write contents
-    for (size_t i = 0; i < _graphs.size(); ++i)
-    {
-      os << "# Graph " << i << "\n";
-      _graphs.at(i).write(os);
-    }
-  }
-
-  const std::vector<DurationEvent> &_duration_events;
-  const std::vector<CounterEvent> &_counter_events;
-  // timestamp to std::pair<maxrss, minflt>
-  std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
-  std::vector<Graph> _graphs;
-};
-
-} // namespace
-
-void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
-{
-  Json::Value root;
-  auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
-
-  struct Stat
-  {
-    uint64_t sum = 0;
-    uint64_t count = 0;
-    uint64_t max = 0;
-    uint64_t min = std::numeric_limits<uint64_t>::max();
-
-    void accumulate(uint64_t val)
-    {
-      sum += val;
-      count++;
-      max = std::max(max, val);
-      min = std::min(min, val);
-    }
-  };
-
-  // Memory
-  {
-    std::unordered_map<std::string, Stat> mem_stats;
-    for (auto &recorder : recorders)
-    {
-      for (auto &evt : recorder->counter_events())
-      {
-        auto &mem_stat = mem_stats[evt.name];
-        uint64_t val = std::stoull(evt.values.at("value"));
-        mem_stat.accumulate(val);
-      }
-    }
-
-    auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
-    for (auto &kv : mem_stats)
-    {
-      auto &key = kv.first;
-      auto &val = kv.second;
-      mem[key]["Avg_Size"] = val.sum / val.count;
-      mem[key]["Max_Size"] = val.max;
-      mem[key]["Min_Size"] = val.min;
-      mem[key]["Runtime"] = "NA";
-    }
-  }
-
-  // Operation Execution Time
-  {
-    // NOTE This assumes _duration_events is sorted by "ts" ascending
-
-    // 2D keys : stats[tid][name]
-    std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
-    std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
-    for (auto &recorder : recorders)
-    {
-      for (auto &evt : recorder->duration_events())
-      {
-        auto &stat = stats[evt.tid][evt.name];
-        auto &begin_ts = begin_timestamps[evt.tid][evt.name];
-        uint64_t timestamp = std::stoull(evt.ts);
-        if (evt.ph == "B")
-        {
-          if (begin_ts != 0)
-            throw std::runtime_error{"Invalid Data"};
-          begin_ts = timestamp;
-        }
-        else if (evt.ph == "E")
-        {
-          if (begin_ts == 0 || timestamp < begin_ts)
-            throw std::runtime_error{"Invalid Data"};
-          stat.accumulate(timestamp - begin_ts);
-          begin_ts = 0;
-        }
-        else
-          throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
-      }
-    }
-
-    for (auto &kv : begin_timestamps)
-      for (auto &kv2 : kv.second)
-        if (kv2.second != 0)
-          throw std::runtime_error{"Invalid Data - B and E pair does not match."};
-
-    for (auto &kv : stats)
-    {
-      auto &tid = kv.first;
-      auto &map = kv.second;
-      auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
-      for (auto &kv : map)
-      {
-        auto &name = kv.first;
-        auto &val = kv.second;
-        json_tid[name]["Avg_Time"] = val.sum / val.count;
-        json_tid[name]["Max_Time"] = val.max;
-        json_tid[name]["Min_Time"] = val.min;
-        json_tid[name]["Runtime"] = tid;
-      }
-    }
-  }
-
-  _os << root;
-}
-
-void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
-{
-  _os << "{\n";
-  _os << "  " << quote("traceEvents") << ": [\n";
-
-  for (auto &recorder : recorders)
-  {
-    flushOneRecord(*recorder);
-  }
-
-  _os << "    { }\n";
-  _os << "  ]\n";
-  _os << "}\n";
-}
-
-void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
-{
-  for (auto &evt : recorder.duration_events())
-  {
-    _os << "    " << object(evt) << ",\n";
-  }
-
-  for (auto &evt : recorder.counter_events())
-  {
-    _os << "    " << object(evt) << ",\n";
-  }
-}
-
-void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
-{
-  for (auto &recorder : records)
-  {
-    MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
-  }
-}
+#include <cassert>
  
  // initialization
  std::mutex EventWriter::_mutex;
diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h

index 0dcd00be69fbd7298d84e1dcd17805a16a8b84b3..0a35a8508bc65cebe84e981efbd7053b04ad375d 100644 (file)
--- a/runtime/onert/core/src/util/EventWriter.h
+++ b/runtime/onert/core/src/util/EventWriter.h
@@ -29,7 +29,9 @@ class EventFormatWriter
  {
  public:
    EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {}
-  virtual ~EventFormatWriter() { /* empty */}
+  virtual ~EventFormatWriter()
+  { /* empty */
+  }
  
    virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0;
  
@@ -40,14 +42,22 @@ protected:
  class SNPEWriter : public EventFormatWriter
  {
  public:
-  SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+  SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath)
+  { /* empty */
+  }
+  ~SNPEWriter() {}
+
    void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
  };
  
  class ChromeTracingWriter : public EventFormatWriter
  {
  public:
-  ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+  ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath)
+  { /* empty */
+  }
+  ~ChromeTracingWriter() {}
+
    void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
  
  private:
@@ -57,13 +67,16 @@ private:
  class MDTableWriter : public EventFormatWriter
  {
  public:
-  MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
-  void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+  MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath)
+  { /* empty */
+  }
+  ~MDTableWriter() {}
  
-private:
-  void flushOneRecord(const EventRecorder &);
+  void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
  };
  
+#include <mutex>
+
  class EventWriter
  {
  public:
@@ -110,7 +123,7 @@ private:
  
      _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name);
      _actual_writers[WriteFormat::CHROME_TRACING] =
-        std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
+      std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
      _actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name);
    };
  
diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc

new file mode 100644 (file)

index 0000000..b7fbac5
--- /dev/null
+++ b/runtime/onert/core/src/util/MDTableEventWriter.cc
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EventWriter.h"
+
+#include <sstream>
+#include <vector>
+#include <unordered_map>
+#include <cassert>
+#include <utility>
+#include <map>
+#include <set>
+#include <stdint.h>
+
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+  os << "| ";
+  for (auto &key : list)
+  {
+    os << key << " | ";
+  }
+  os << "\n";
+}
+
+struct MDContent
+{
+  std::string name;
+  uint64_t begin_ts;
+  uint64_t end_ts;
+  uint32_t min_rss;
+  uint32_t max_rss;
+  uint32_t min_page_reclaims;
+  uint32_t max_page_reclaims;
+
+  MDContent()
+    : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+      max_page_reclaims(0)
+  {
+    // DO NOTHING
+  }
+
+  virtual ~MDContent() = default;
+
+  void updateRss(uint32_t rss)
+  {
+    if (min_rss == UINT32_MAX)
+      min_rss = rss;
+    if (max_rss == 0)
+      max_rss = rss;
+
+    if (min_rss > rss)
+      min_rss = rss;
+    else if (max_rss < rss)
+      max_rss = rss;
+  }
+
+  void updateMinflt(uint32_t minflt)
+  {
+    if (min_page_reclaims == UINT32_MAX)
+      min_page_reclaims = minflt;
+    if (max_page_reclaims == 0)
+      max_page_reclaims = minflt;
+
+    if (min_page_reclaims > minflt)
+      min_page_reclaims = minflt;
+    else if (max_page_reclaims < minflt)
+      max_page_reclaims = minflt;
+  }
+
+  virtual void write(std::ostream &os) const = 0;
+};
+
+struct Operation : public MDContent
+{
+  std::string backend;
+  uint64_t graph_latency;
+
+  struct OperationCmp
+  {
+    bool operator()(const Operation &lhs, const Operation &rhs) const
+    {
+      return lhs.begin_ts < rhs.begin_ts;
+    }
+    bool operator()(const Operation &lhs, const Operation &rhs)
+    {
+      return lhs.begin_ts < rhs.begin_ts;
+    }
+    bool operator()(Operation &lhs, Operation &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+  };
+
+  void write(std::ostream &os) const override
+  {
+    uint64_t op_latency = end_ts - begin_ts;
+    double op_per = static_cast<double>(op_latency) / graph_latency * 100.0;
+    writeMDTableRow(os, {name, backend, std::to_string(op_latency), std::to_string(op_per),
+                         std::to_string(min_rss), std::to_string(max_rss),
+                         std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+  }
+};
+
+struct Graph : public MDContent
+{
+  std::set<Operation, Operation::OperationCmp> ops;
+  std::string session_index;
+  std::string subgraph_index;
+
+  void setOperations(const std::map<std::string, Operation> &name_to_op)
+  {
+    uint64_t graph_latency = end_ts - begin_ts;
+    for (auto it : name_to_op)
+    {
+      auto op = it.second;
+      op.graph_latency = graph_latency;
+
+      ops.insert(op);
+
+      updateRss(op.min_rss);
+      updateRss(op.max_rss);
+      updateMinflt(op.min_page_reclaims);
+      updateMinflt(op.max_page_reclaims);
+    }
+  }
+
+  void write(std::ostream &os) const override
+  {
+    static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+                                                  "page_reclaims_min", "page_reclaims_max"};
+
+    static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+                                                       "-----------------", "-----------------"};
+
+    // Graph's Header
+    writeMDTableRow(os, graph_headers);
+    writeMDTableRow(os, graph_headers_line);
+
+    // Graph's contents
+    writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+                         std::to_string(max_rss), std::to_string(min_page_reclaims),
+                         std::to_string(max_page_reclaims)});
+
+    os << "\n";
+
+    static std::vector<std::string> op_headers{
+      "Op name",     "backend",     "latency(us)",       "latency(%)",
+      "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+    static std::vector<std::string> op_headers_line{
+      "-------", "-------", "-----------",       "-----------",
+      "-------", "-------", "-----------------", "-----------------"};
+
+    os << "## Op \n";
+
+    // Operation's Header
+    writeMDTableRow(os, op_headers);
+    writeMDTableRow(os, op_headers_line);
+
+    // Operation's contents
+    for (auto op : ops)
+    {
+      op.write(os);
+    }
+
+    os << "\n";
+  }
+};
+
+std::string getLabel(const OpSeqDurationEvent &evt)
+{
+  std::string subg_label("$" + std::to_string(evt.subg_index) + " subgraph");
+  std::string op_label("@" + std::to_string(evt.op_index) + " " + evt.op_name);
+
+  return subg_label + " " + op_label;
+}
+
+struct MDTableBuilder
+{
+  MDTableBuilder(const std::vector<std::unique_ptr<DurationEvent>> &duration_events,
+                 const std::vector<CounterEvent> &counter_events)
+    : _duration_events(duration_events), _counter_events(counter_events)
+  {
+// when ready with low overhead in release build
+#ifdef DEBUG
+    for (const auto &evt : _counter_events)
+    {
+      uint64_t ts = std::stoull(evt.ts);
+      auto &name = evt.name;
+      assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+      assert(evt.values.size() == 1);
+      auto &val = evt.values.begin()->second;
+      if (_ts_to_values.find(ts) == _ts_to_values.end())
+      {
+        std::pair<uint32_t, uint32_t> values;
+        if (name.compare("maxrss") == 0)
+          values.first = std::stoul(val);
+        else
+          values.second = std::stoul(val);
+        _ts_to_values.insert({ts, values});
+      }
+      else
+      {
+        auto &values = _ts_to_values.at(ts);
+        if (name.compare("maxrss") == 0)
+          values.first = std::stoul(val);
+        else
+          values.second = std::stoul(val);
+      }
+    }
+#endif
+  }
+
+  MDTableBuilder &build()
+  {
+    for (auto &it : divideGraph())
+    {
+      size_t begin_idx = it.first;
+      size_t end_idx = it.second;
+      std::map<std::string, Operation> name_to_op;
+      for (size_t i = begin_idx + 1; i < end_idx; ++i)
+      {
+        const auto *evt = dynamic_cast<const OpSeqDurationEvent *>(_duration_events[i].get());
+        if (evt == nullptr)
+          continue;
+
+        const std::string evt_name = getLabel(*evt);
+        assert(evt->ph.compare("B") == 0 || evt->ph.compare("E") == 0);
+        if (evt->ph.compare("B") == 0)
+        {
+          assert(name_to_op.find(evt_name) == name_to_op.end());
+          name_to_op.insert({evt_name, makeOperation(*evt)});
+        }
+        else
+        {
+          assert(name_to_op.find(evt_name) != name_to_op.end());
+          auto &op = name_to_op.at(evt_name);
+          updateOperation(op, *evt);
+        }
+      }
+
+      _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_op));
+    }
+
+    return *this;
+  }
+
+  std::vector<std::pair<size_t, size_t>> divideGraph()
+  {
+    std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+    for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+    {
+      const auto subg_evt = dynamic_cast<const SubgDurationEvent *>(_duration_events.at(i).get());
+      if (subg_evt == nullptr)
+        continue;
+
+      if (subg_evt->ph.compare("B") == 0)
+        begin_idx = i;
+      else
+        graph_idx_list.emplace_back(begin_idx, i);
+    }
+    return graph_idx_list;
+  }
+
+  Operation makeOperation(const OpSeqDurationEvent &evt)
+  {
+    Operation op;
+    const std::string &evt_name = getLabel(evt);
+    op.name = evt_name;
+    op.begin_ts = std::stoull(evt.ts);
+    op.backend = evt.backend;
+#ifdef DEBUG
+    op.updateRss(_ts_to_values.at(op.begin_ts).first);
+    op.updateMinflt(_ts_to_values.at(op.begin_ts).second);
+#else
+    op.updateRss(0);
+    op.updateMinflt(0);
+#endif
+    return op;
+  }
+
+  void updateOperation(Operation &op, const DurationEvent &evt)
+  {
+    op.end_ts = std::stoull(evt.ts);
+#ifdef DEBUG
+    op.updateRss(_ts_to_values.at(op.end_ts).first);
+    op.updateMinflt(_ts_to_values.at(op.end_ts).second);
+#else
+    op.updateRss(0);
+    op.updateMinflt(0);
+#endif
+  }
+
+  Graph makeGraph(size_t begin_idx, size_t end_idx,
+                  const std::map<std::string, Operation> &name_to_op)
+  {
+    Graph graph;
+    graph.name = "Subgraph";
+    graph.begin_ts = std::stoull(_duration_events[begin_idx]->ts);
+    graph.end_ts = std::stoull(_duration_events[end_idx]->ts);
+    graph.setOperations(name_to_op);
+
+    for (auto &arg : _duration_events[end_idx]->args)
+    {
+      if (arg.first == "session")
+        graph.session_index = arg.second;
+      if (arg.first == "subgraph")
+        graph.subgraph_index = arg.second;
+    }
+
+#ifdef DEBUG
+    graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+    graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+    graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+    graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+#else
+    graph.updateRss(0);
+    graph.updateMinflt(0);
+#endif
+    return graph;
+  }
+
+  void write(std::ostream &os)
+  {
+    // Write contents
+    for (size_t i = 0; i < _graphs.size(); ++i)
+    {
+      auto &graph = _graphs.at(i);
+      os << "# Session: " << graph.session_index << ", Subgraph: " << graph.subgraph_index
+         << ", Running count: " << i << "\n";
+      _graphs.at(i).write(os);
+    }
+  }
+
+  const std::vector<std::unique_ptr<DurationEvent>> &_duration_events;
+  const std::vector<CounterEvent> &_counter_events;
+
+  // timestamp to std::pair<maxrss, minflt>
+  std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+  std::vector<Graph> _graphs;
+};
+
+} // namespace
+
+void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
+{
+  for (auto &recorder : records)
+  {
+    MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
+  }
+}
diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc

new file mode 100644 (file)

index 0000000..6f03cfc
--- /dev/null
+++ b/runtime/onert/core/src/util/SNPEEventWriter.cc
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EventWriter.h"
+
+#include <unordered_map>
+#include <json/json.h>
+#include <cassert>
+#include <utility>
+
+/**
+ * @brief Version of SNPE format
+ * In version 1
+ * - There is no "version" field in Json
+ * - Only one subgraph is supported
+ * - Operation name is a form of "$3 ADD"
+ *
+ * In version 2,
+ * - "version" : "2" was added in Json
+ * - Multiple session and multiple subgraphs are supported
+ * - When there is only one session, operation name is a form of "$2 subgraph $3 ADD",
+ *   meaning ADD op whose operation index 3 in a subgraph whose index is 2
+ * - When there are two or more sessions, operation name is a form of
+ *   "$1 session $2 subgraph $3 ADD", meaning ADD op whose operation index 3
+ *   in a subgraph whose index is 2, which was run in 1st session.
+ */
+#define SNPE_JSON_SCHEMA_VERSION "2"
+
+namespace
+{
+
+std::string getLabel(const DurationEvent &evt)
+{
+  if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+  {
+    std::string subg_label("$" + std::to_string(evt_ptr->subg_index) + " subgraph");
+    std::string op_label("$" + std::to_string(evt_ptr->op_index) + " " + evt_ptr->op_name);
+
+    // Note : At this moment, there is only one thread running for EventWriter
+    if (evt_ptr->tracing_ctx->hasMultipleSessions())
+    {
+      std::string session_label("$" + std::to_string(evt_ptr->session_index) + " session");
+      return session_label + " " + subg_label + " " + op_label;
+    }
+    else
+    {
+      // When there is only one session, do not include session info
+      // Refer to https://github.sec.samsung.net/STAR/nnfw/issues/11436#issuecomment-930332
+      return subg_label + " " + op_label;
+    }
+  }
+  else // SubgEvent
+    return "Graph";
+}
+
+std::string getBackend(const DurationEvent &evt)
+{
+  if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+    return evt_ptr->backend;
+  else // SubbEvent
+    return "runtime";
+}
+
+} // namespace
+
+void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
+{
+  struct Stat
+  {
+    uint64_t sum = 0;
+    uint64_t count = 0;
+    uint64_t max = 0;
+    uint64_t min = std::numeric_limits<uint64_t>::max();
+
+    void accumulate(uint64_t val)
+    {
+      sum += val;
+      count++;
+      max = std::max(max, val);
+      min = std::min(min, val);
+    }
+  };
+
+  Json::Value root;
+  root["version"] = SNPE_JSON_SCHEMA_VERSION;
+
+  auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
+
+  // Memory
+  {
+    std::unordered_map<std::string, Stat> mem_stats;
+    for (auto &recorder : recorders)
+    {
+      for (auto &evt : recorder->counter_events())
+      {
+        auto &mem_stat = mem_stats[evt.name];
+        uint64_t val = std::stoull(evt.values.at("value"));
+        mem_stat.accumulate(val);
+      }
+    }
+
+    auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
+    for (auto &kv : mem_stats)
+    {
+      auto &key = kv.first;
+      auto &val = kv.second;
+      mem[key]["Avg_Size"] = val.sum / val.count;
+      mem[key]["Max_Size"] = val.max;
+      mem[key]["Min_Size"] = val.min;
+      mem[key]["Runtime"] = "NA";
+    }
+  }
+
+  // Operation Execution Time
+  {
+    // NOTE This assumes _duration_events is sorted by "ts" ascending
+
+    // 2D keys : stats[tid][name]
+    std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
+    std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
+    for (auto &recorder : recorders)
+    {
+      for (auto &evt : recorder->duration_events())
+      {
+        std::string evt_name = getLabel(*evt);
+        std::string evt_tid = getBackend(*evt);
+
+        auto &stat = stats[evt_tid][evt_name];
+        auto &begin_ts = begin_timestamps[evt_tid][evt_name];
+        uint64_t timestamp = std::stoull(evt->ts);
+        if (evt->ph == "B")
+        {
+          if (begin_ts != 0)
+            throw std::runtime_error{"Invalid Data"};
+          begin_ts = timestamp;
+        }
+        else if (evt->ph == "E")
+        {
+          if (begin_ts == 0 || timestamp < begin_ts)
+            throw std::runtime_error{"Invalid Data"};
+          stat.accumulate(timestamp - begin_ts);
+          begin_ts = 0;
+        }
+        else
+          throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt->ph + "\""};
+      }
+    }
+
+    for (auto &kv : begin_timestamps)
+      for (auto &kv2 : kv.second)
+        if (kv2.second != 0)
+          throw std::runtime_error{"Invalid Data - B and E pair does not match."};
+
+    for (auto &kv : stats)
+    {
+      auto &tid = kv.first;
+      auto &map = kv.second;
+      auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
+      for (auto &kv : map)
+      {
+        auto &name = kv.first;
+        auto &val = kv.second;
+        json_tid[name]["Avg_Time"] = val.sum / val.count;
+        json_tid[name]["Max_Time"] = val.max;
+        json_tid[name]["Min_Time"] = val.min;
+        json_tid[name]["Runtime"] = tid;
+      }
+    }
+  }
+
+  _os << root;
+}
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc

index 3ed3080ccf2245c129024182eb3971f388c49bb4..173de29c793a9dfa10e7eb564d57c94aeb81f196 100644 (file)
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -111,10 +111,9 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c
        break;
      case ir::PaddingType::EXPLICIT:
        out_h =
-          (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+        (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
        out_w =
-          (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
-          1;
+        (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal + 1;
        break;
      default:
        assert(false);
@@ -595,9 +594,9 @@ template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delt
    ir::Shape out_shape(static_cast<int>(1));
  
    out_shape.dim(0) =
-      (std::is_integral<T>::value
-           ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val))
-           : std::ceil(std::abs((start_val - limit_val) / delta_val)));
+    (std::is_integral<T>::value
+       ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val))
+       : std::ceil(std::abs((start_val - limit_val) / delta_val)));
    return out_shape;
  }
  
@@ -664,9 +663,9 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
      ir::Shape true_shape = input_true_shape;
      ir::Shape false_shape = input_false_shape;
      int most_rank =
-        (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank())
-            ? cond_shape.rank()
-            : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank());
+      (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank())
+        ? cond_shape.rank()
+        : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank());
  
      ir::Shape calculate_shape(most_rank);
  
@@ -677,9 +676,9 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
      for (int i = 0; i < most_rank; ++i)
      {
        calculate_shape.dim(i) =
-          (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i))
-              ? cond_shape.dim(i)
-              : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i));
+        (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i))
+          ? cond_shape.dim(i)
+          : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i));
  
        if ((cond_shape.dim(i) != calculate_shape.dim(i) && cond_shape.dim(i) != 1) ||
            (true_shape.dim(i) != calculate_shape.dim(i) && true_shape.dim(i) != 1) ||
@@ -711,8 +710,8 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
    return new_shape;
  }
  
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
-                          const int32_t *sizes_buf)
+template <typename T>
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const T *begins_buf, const T *sizes_buf)
  {
    const uint32_t rank = input_shape.rank();
    ir::Shape out_shape(rank);
@@ -737,14 +736,19 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_bu
      }
      else
      {
-      if (input_dim < begin + size)
+      if (input_dim < static_cast<int32_t>(begin + size))
          throw std::runtime_error("shape inference Slice: Invalid begin and size.");
      }
-    out_shape.dim(idx) = size;
+    out_shape.dim(idx) = static_cast<int32_t>(size);
    }
  
    return out_shape;
  }
+// template instantiation
+template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
+                                   const int32_t *sizes_buf);
+template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int64_t *begins_buf,
+                                   const int64_t *sizes_buf);
  
  ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
                                     const ir::Shape &padding_shape, const int32_t *block_shape_buf,
@@ -776,7 +780,7 @@ ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape
    for (int dim = 0; dim < kSpatialDimensionNum; ++dim)
    {
      int final_dim_size =
-        (input_shape.dim(dim + 1) + padding_buf[dim * 2] + padding_buf[dim * 2 + 1]);
+      (input_shape.dim(dim + 1) + padding_buf[dim * 2] + padding_buf[dim * 2 + 1]);
  
      assert(final_dim_size % block_shape_buf[dim] == 0);
  
@@ -839,7 +843,7 @@ ir::Shape inferSqueezeShape(const ir::Shape &in_shape, const ir::operation::Sque
        if (!(current >= 0 && current < shape_rank && in_shape.dim(current) == 1))
        {
          throw std::runtime_error(
-            "The following conditions must be met: 0 <= dim < Shape rank, dim == 1");
+          "The following conditions must be met: 0 <= dim < Shape rank, dim == 1");
        }
  
        if (!should_squeeze[current])
@@ -1052,9 +1056,9 @@ ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier_bu
  {
    if (multiplier_size != in_shape.rank())
    {
-    throw std::runtime_error("inferTileShape failed, input rank: " +
-                             std::to_string(in_shape.rank()) + ", bad multipliers size: " +
-                             std::to_string(multiplier_size) + "");
+    throw std::runtime_error(
+      "inferTileShape failed, input rank: " + std::to_string(in_shape.rank()) +
+      ", bad multipliers size: " + std::to_string(multiplier_size) + "");
    }
    ir::Shape new_Shape(in_shape.rank());
  
diff --git a/runtime/onert/core/src/util/TracingCtx.cc b/runtime/onert/core/src/util/TracingCtx.cc

index 08a1b32a763a7ad5f7871590be5fb8a951d18155..c05baee60d88444080b455075fef986a96f048f7 100644 (file)
--- a/runtime/onert/core/src/util/TracingCtx.cc
+++ b/runtime/onert/core/src/util/TracingCtx.cc
@@ -24,6 +24,7 @@ namespace util
  
  // initializing static member var
  std::mutex TracingCtx::_session_id_mutex;
+uint32_t TracingCtx::_next_session_id = 0;
  
  } // namespace util
  } // namespace onert
diff --git a/runtime/onert/frontend/.clang-format b/runtime/onert/frontend/.clang-format

deleted file mode 120000 (symlink)

index 83185fe..0000000
--- a/runtime/onert/frontend/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../../../.clang-format.8
-\ No newline at end of file
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h

index f9c97b41b12011f8555a7e402c73b01c2f58a354..c096e705da052f480373b1548f8d00f259c33f84 100644 (file)
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -78,7 +78,7 @@ public:
     *
     * @param file_path
     */
-  void loadFromFile(const char *file_path);
+  void loadFromFile(const std::string &file_path);
    /**
     * @brief Load a model from a buffer
     *
@@ -98,7 +98,8 @@ protected:
  
    // Create operands form tflite::Tensor
    ir::OperandIndex loadOperand(const Tensor *tensor, ir::Graph &subg);
-  void loadSparsity(const Tensor *tensor, const ir::Shape &shape, ir::TypeInfo &typeInfo);
+  void loadQuantization(const Tensor *tensor, ir::TypeInfo &typeInfo);
+  void loadSparsity(const Tensor *tensor, ir::TypeInfo &typeInfo);
    void loadOperationIO(const Operator *op, ir::OperandIndexSequence &inputs,
                         ir::OperandIndexSequence &outputs);
    // Create operations from Operator
@@ -185,22 +186,24 @@ protected:
    std::unique_ptr<Verifier> _verifier;
    // Boolean flag to use MMAPED_DATA
    bool _use_mmaped_data = false;
+
+  std::unordered_map<uint32_t /* Buffer Index in circle file */, std::shared_ptr<ir::Data>>
+    _buf_to_data;
  };
  
  template <typename LoaderDomain>
-void BaseLoader<LoaderDomain>::BaseLoader::loadFromFile(const char *file_path)
+void BaseLoader<LoaderDomain>::BaseLoader::loadFromFile(const std::string &file_path)
  {
-  _fd = open(file_path, O_RDONLY);
+  _fd = open(file_path.c_str(), O_RDONLY);
    if (_fd < 0)
    {
-    throw std::runtime_error("Failed to open file " + std::string(file_path));
+    throw std::runtime_error("Failed to open file " + file_path);
    }
  
    struct stat file_stat;
    if (fstat(_fd, &file_stat) != 0)
    {
-    throw std::runtime_error("Fstat failed or file " + std::string(file_path) +
-                             " is not a regular file");
+    throw std::runtime_error("Fstat failed or file " + file_path + " is not a regular file");
    }
    int size = file_stat.st_size;
  
@@ -324,42 +327,10 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
    //       If app wants to change the input shape, call nnfw_apply_input_tensorinfo() can
    //       be used.
  
-  // Type
-  ir::DataType data_type = tensorTypeToDataType(tensor->type());
-  // Quantization
-  auto q_params = tensor->quantization();
-  float scale = 0.0;
-  long zero_point = 0;
-  if (q_params != nullptr)
-  {
-    if (q_params->scale())
-    {
-      if (q_params->scale()->size() != 1)
-      {
-        throw std::runtime_error("Only 1 scale for a tensor is supported.");
-      }
-      scale = q_params->scale()->Get(0);
-    }
-
-    if (q_params->zero_point())
-    {
-      if (q_params->zero_point()->size() != 1)
-      {
-        throw std::runtime_error("Only 1 zero_point value for a tensor is supported.");
-      }
-      zero_point = q_params->zero_point()->Get(0);
-      // zero_point is long while TypeInfo.zero_point is defined as int32_t.
-      assert(zero_point >= std::numeric_limits<int32_t>::min());
-      assert(zero_point <= std::numeric_limits<int32_t>::max());
-    }
-    auto details = q_params->details_as_CustomQuantization();
-    if (details != nullptr)
-      throw std::runtime_error("Custom Quantization is not supported");
-  }
-  // Create TypeInfo
-  ir::TypeInfo type_info(data_type, scale, zero_point);
-  // Sparsity
-  loadSparsity(tensor, shape, type_info);
+  // TypeInfo
+  ir::TypeInfo type_info(tensorTypeToDataType(tensor->type()));
+  loadQuantization(tensor, type_info);
+  loadSparsity(tensor, type_info);
  
    // Create operand
    const auto operand_index = subg.addOperand(shape, type_info);
@@ -369,10 +340,11 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
    if (data != nullptr)
    {
      using std::ptrdiff_t;
-    std::unique_ptr<ir::Data> data_obj;
+    std::shared_ptr<ir::Data> data_obj;
+
      if (_fd == -1) // Model is from memory
      {
-      data_obj = std::make_unique<ir::ExternalData>(data->data(), data->size());
+      data_obj = std::make_shared<ir::ExternalData>(data->data(), data->size());
      }
      else // Model is loaded(mmap'd) from a file
      {
@@ -385,17 +357,30 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
        ptrdiff_t aligned_offset_start = (unaligned_offset_start / _pagesize) * _pagesize;
        size_t mmap_size = offset_end - aligned_offset_start;
  
-      if (_use_mmaped_data)
+      uint32_t buf_idx = tensor->buffer();
+      auto buffer_found = _buf_to_data.find(buf_idx);
+
+      if (buffer_found != _buf_to_data.end())
+      {
+        // Another tensor points this buffer and its matching Data(either CachedData or MMapedData)
+        // was already created. Let's reuse the Data
+        data_obj = buffer_found->second;
+      }
+      else if (_use_mmaped_data)
        {
-        data_obj = std::make_unique<ir::MMapedData>(_fd, aligned_offset_start, mmap_size,
+        data_obj = std::make_shared<ir::MMapedData>(_fd, aligned_offset_start, mmap_size,
                                                      unaligned_offset_start, data_size);
+        _buf_to_data[buf_idx] = data_obj;
        }
        else
        {
          size_t offset = unaligned_offset_start - aligned_offset_start;
          uint8_t *mmap_base = static_cast<uint8_t *>(
            mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
-        data_obj = std::make_unique<ir::CachedData>(mmap_base + offset, data_size);
+
+        data_obj = std::make_shared<ir::CachedData>(mmap_base + offset, data_size);
+        _buf_to_data[buf_idx] = data_obj;
+
          munmap(mmap_base, mmap_size);
        }
      }
@@ -417,8 +402,46 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
  }
  
  template <typename LoaderDomain>
-void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, const ir::Shape &shape,
-                                            ir::TypeInfo &typeInfo)
+void BaseLoader<LoaderDomain>::loadQuantization(const Tensor *tensor, ir::TypeInfo &typeInfo)
+{
+  auto q_params = tensor->quantization();
+  if (q_params == nullptr || q_params->scale() == nullptr || q_params->scale()->size() == 0)
+  {
+    typeInfo.quantization(0., 0);
+    return;
+  }
+  if (q_params->zero_point() == nullptr)
+  {
+    throw std::runtime_error("Quantization params: scale is not null, but zero_point is null.");
+  }
+  const size_t num_scales = q_params->scale()->size();
+  if (num_scales != q_params->zero_point()->size())
+  {
+    throw std::runtime_error("Quantization params: scale size != zero_point size");
+  }
+  std::vector<float> scales;
+  std::vector<int32_t> zero_points;
+  scales.resize(num_scales);
+  zero_points.resize(num_scales);
+  for (size_t i = 0; i < num_scales; ++i)
+  {
+    scales[i] = q_params->scale()->Get(i);
+    // zero_point is defined as long (i64) in schema while TypeInfo's zero_point is int32_t.
+    // int64_t is used instead of long because long is 4 byte in most 32bit architecture.
+    int64_t zero_point = q_params->zero_point()->Get(i);
+    if (zero_point < std::numeric_limits<int32_t>::min() ||
+        zero_point > std::numeric_limits<int32_t>::max())
+      throw std::runtime_error("Zero_point is out of int32 range.");
+    zero_points[i] = static_cast<int32_t>(zero_point);
+  }
+  auto details = q_params->details_as_CustomQuantization();
+  if (details != nullptr)
+    throw std::runtime_error("Custom Quantization is not supported");
+  typeInfo.quantization(std::move(scales), std::move(zero_points));
+}
+
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, ir::TypeInfo &typeInfo)
  {
    auto src_sparsity = tensor->sparsity();
    if (src_sparsity != nullptr)
@@ -447,8 +470,8 @@ void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, const ir::Shap
        }
      }
      // load metadata
-    const int dim_metadata_size = src_sparsity->dim_metadata()->size();
-    auto dense_rank = shape.rank();
+    const auto dim_metadata_size = src_sparsity->dim_metadata()->size();
+    const auto dense_rank = tensor->shape() ? tensor->shape()->size() : 0;
      if (dense_rank + block_rank != dim_metadata_size)
        throw std::runtime_error("sparsity dim_metadata length is wrong.");
      bool random_sparsity = dim_metadata_size == 2 && block_rank == 0;
diff --git a/runtime/onert/frontend/circle/CMakeLists.txt b/runtime/onert/frontend/circle/CMakeLists.txt

index e89e86142fbee86c334b986f68d896eb6c8598c8..fffe5cc37028348e52b79e41209d9a08015a40f0 100644 (file)
--- a/runtime/onert/frontend/circle/CMakeLists.txt
+++ b/runtime/onert/frontend/circle/CMakeLists.txt
@@ -4,17 +4,11 @@ endif ()
  
  set(CIRCLE_LOADER_SOURCES src/circle_loader.cc)
  
-add_library(circle_loader SHARED ${CIRCLE_LOADER_SOURCES})
+add_library(circle_loader STATIC ${CIRCLE_LOADER_SOURCES})
+set_target_properties(circle_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
  
  target_include_directories(circle_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
  
  target_link_libraries(circle_loader PRIVATE onert_core)
  target_link_libraries(circle_loader PRIVATE base_loader nnfw_common nnfw_coverage)
  target_link_libraries(circle_loader PRIVATE circle_schema)
-
-if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
-  add_custom_command(TARGET circle_loader POST_BUILD
-                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:circle_loader>)
-endif()
-
-install(TARGETS circle_loader DESTINATION lib)
diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h

index 675a5b3e7dad861ca0183e8fe1c177f828bddaf6..44bf28056a0abf3a031757702e9d2d2f8f2d3c6e 100644 (file)
--- a/runtime/onert/frontend/circle/include/circle_loader.h
+++ b/runtime/onert/frontend/circle/include/circle_loader.h
@@ -25,7 +25,7 @@ namespace onert
  {
  namespace circle_loader
  {
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename);
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
  std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size);
  } // namespace circle_loader
  } // namespace onert
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc

index 0d7b3eab4b269c5ba64b5e9203a000cc9fa9ad49..652fbc778bfe46f10b48b92548fc16e7b42a663b 100644 (file)
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -122,7 +122,7 @@ private:
  
      subg->setLayout(convertDataFormat(circle_subg->data_format()));
  
-    subg->finishBuilding();
+    subg->verify();
  
      return subg;
    }
@@ -202,7 +202,7 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
  
  } // namespace
  
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
  {
    auto subgraphs = std::make_unique<ir::Subgraphs>();
    CircleLoader loader(subgraphs);
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc

index 3e2bea114df3fb05f15f5a2ae032a62b96a1aba6..81ffa26f3c42157e1a7b06db58115093e1e8bfd6 100644 (file)
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
@@ -27,7 +27,8 @@
  // ANeuralNetworksModel
  //
  ANeuralNetworksModel::ANeuralNetworksModel() noexcept
-  : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
+  : _finished_building{false}, _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{
+                                                                          false}
  {
    _graph = std::make_shared<onert::ir::Graph>();
  }
@@ -208,9 +209,9 @@ bool ANeuralNetworksModel::finish() noexcept
    {
      fillOptionalOperand();
  
-    _graph->finishBuilding();
-
+    _graph->verify();
      _operand_usages.clear();
+    _finished_building = true;
    }
    catch (const std::exception &e)
    {
@@ -222,7 +223,7 @@ bool ANeuralNetworksModel::finish() noexcept
    return true;
  }
  
-bool ANeuralNetworksModel::isFinished() noexcept { return !_graph->isBuildingPhase(); }
+bool ANeuralNetworksModel::isFinished() noexcept { return _finished_building; }
  
  bool ANeuralNetworksModel::isExistOperand(uint32_t index) noexcept
  {
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h

index df6c97c4463fbda4024d0e7e967e18463f60e2b6..4301193d6e3488144713eec230ad56fa06201ad5 100644 (file)
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
@@ -67,6 +67,7 @@ private:
  
  private:
    std::shared_ptr<onert::ir::Graph> _graph;
+  bool _finished_building;
    std::unordered_set<onert::ir::OperandIndex> _optional_operands;
    std::vector<OperandUsage> _operand_usages;
    bool _allowFloat32toFloat16;
diff --git a/runtime/onert/frontend/tflite/CMakeLists.txt b/runtime/onert/frontend/tflite/CMakeLists.txt

index e84eb3e3e1112b704333f50c2ad932b7c085c812..792feebe508596329770e81408fad713a8f76e3a 100644 (file)
--- a/runtime/onert/frontend/tflite/CMakeLists.txt
+++ b/runtime/onert/frontend/tflite/CMakeLists.txt
@@ -4,16 +4,10 @@ endif(NOT BUILD_TFLITE_LOADER)
  
  set(TFLITE_LOADER_SOURCES src/tflite_loader.cc)
  
-add_library(tflite_loader SHARED ${TFLITE_LOADER_SOURCES})
+add_library(tflite_loader STATIC ${TFLITE_LOADER_SOURCES})
+set_target_properties(tflite_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
  
  target_include_directories(tflite_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
  
  target_link_libraries(tflite_loader PRIVATE onert_core)
  target_link_libraries(tflite_loader PRIVATE base_loader nnfw_common nnfw_coverage)
-
-if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
-  add_custom_command(TARGET tflite_loader POST_BUILD
-                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:tflite_loader>)
-endif()
-
-install(TARGETS tflite_loader DESTINATION lib)
diff --git a/runtime/onert/frontend/tflite/include/tflite_loader.h b/runtime/onert/frontend/tflite/include/tflite_loader.h

index 743c05f9eb751a369c02fca74c5c5dd455a639fc..dda34cc6a764d5d1dff7bce650b02d63e0213c22 100644 (file)
--- a/runtime/onert/frontend/tflite/include/tflite_loader.h
+++ b/runtime/onert/frontend/tflite/include/tflite_loader.h
@@ -26,7 +26,7 @@ namespace onert
  namespace tflite_loader
  {
  
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename);
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
  
  } // namespace tflite_loader
  } // namespace onert
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc

index 91919a0a2027ff5b17c153b5a5ffe7936864faf9..8669bbb447b20b538a0837ab0b15d29381cfac04 100644 (file)
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -107,7 +107,7 @@ private:
        loadOperation(op, *subg);
      }
  
-    subg->finishBuilding();
+    subg->verify();
  
      return subg;
    }
@@ -115,7 +115,7 @@ private:
  
  } // namespace
  
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
  {
    auto subgraphs = std::make_unique<ir::Subgraphs>();
    TFLiteLoader loader(subgraphs);
diff --git a/runtime/onert/sample/.clang-format b/runtime/onert/sample/.clang-format

deleted file mode 120000 (symlink)

index 83185fe..0000000
--- a/runtime/onert/sample/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../../../.clang-format.8
-\ No newline at end of file
diff --git a/runtime/onert/test/.clang-format b/runtime/onert/test/.clang-format

deleted file mode 120000 (symlink)

index 83185fe..0000000
--- a/runtime/onert/test/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../../../.clang-format.8
-\ No newline at end of file
diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc

index c77ebb8956e592fbe28f78a181bb1daba29064cb..a7185ca0bce378ea050d006d120ab13ff96e3305 100644 (file)
--- a/runtime/onert/test/core/compiler/HEScheduler.cc
+++ b/runtime/onert/test/core/compiler/HEScheduler.cc
@@ -49,13 +49,20 @@ struct MockConfigCPU : public IConfig
    bool supportFP16() override { return false; }
  };
  
+class MockBackendContext : public BackendContext
+{
+public:
+  using BackendContext::BackendContext;
+  ITensorRegistry *genTensors() override { return nullptr; }
+  FunctionMap genKernels() override { return {}; }
+};
+
  struct MockBackendCPU : public Backend
  {
    std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
-  std::unique_ptr<BackendContext>
-  newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
    {
-    return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
    }
  };
  
@@ -75,10 +82,9 @@ struct MockConfigGPU : public IConfig
  struct MockBackendGPU : public Backend
  {
    std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
-  std::unique_ptr<BackendContext>
-  newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
    {
-    return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
    }
  };
  
@@ -98,10 +104,9 @@ struct MockConfigNPU : public IConfig
  struct MockBackendNPU : public Backend
  {
    std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
-  std::unique_ptr<BackendContext>
-  newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
    {
-    return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
    }
  };
  
@@ -236,7 +241,7 @@ std::shared_ptr<Graph> createStraightGraph()
    BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
    create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
  
-  graph->finishBuilding();
+  graph->verify();
    return graph;
  }
  
@@ -292,7 +297,7 @@ std::shared_ptr<Graph> createBranchedGraph()
    BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
    create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
  
-  graph->finishBuilding();
+  graph->verify();
    return graph;
  }
  
@@ -337,16 +342,6 @@ protected:
      setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
    }
  
-  backend::BackendContexts buildBackendContexts(const Graph &graph)
-  {
-    backend::BackendContexts contexts;
-    for (auto backend : _mock_backends)
-    {
-      contexts.emplace(backend, backend->newContext(graph, nullptr, false));
-    }
-    return contexts;
-  }
-
    const MockBackendCPU *_cpu_backend{nullptr};
    const MockBackendGPU *_gpu_backend{nullptr};
    const MockBackendNPU *_npu_backend{nullptr};
@@ -392,9 +387,8 @@ TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
      et.storeOperationsExecTime();
  
      // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+    auto scheduler =
+      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
      const auto br = scheduler.schedule(*graph);
      ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
      ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
@@ -408,9 +402,8 @@ TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
      setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
  
      // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+    auto scheduler =
+      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
      const auto br = scheduler.schedule(*graph);
      ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
      ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
@@ -451,9 +444,8 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
      et.storeOperationsExecTime();
  
      // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+    auto scheduler =
+      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
      const auto br = scheduler.schedule(*graph);
  
      std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
@@ -486,9 +478,8 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
      et.storeOperationsExecTime();
  
      // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+    auto scheduler =
+      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
      const auto br = scheduler.schedule(*graph);
      ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
      ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
@@ -537,9 +528,8 @@ TEST_F(HESchedulerTest, branched_graph_profiling_mode)
      et.storeOperationsExecTime();
  
      // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+    auto scheduler =
+      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
      const auto br = scheduler.schedule(*graph);
      ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
      ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
@@ -560,9 +550,8 @@ TEST_F(HESchedulerTest, branched_graph_profiling_mode)
      et.storeOperationsExecTime();
  
      // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+    auto scheduler =
+      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
      const auto br = scheduler.schedule(*graph);
      ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
                br->getBackend(mul1_op_idx)->config()->id());
diff --git a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc

new file mode 100644 (file)

index 0000000..b18dedd
--- /dev/null
+++ b/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include "ir/Graph.h"
+#include "compiler/pass/UnusedOperandEliminationPass.h"
+
+using namespace onert::ir;
+using namespace onert::compiler::pass;
+
+TEST(UnusedOperandEliminationPass, Simple)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  auto unused = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+
+  UnusedOperandEliminationPass{graph}.run();
+
+  ASSERT_TRUE(graph.operands().exist(in));
+  ASSERT_TRUE(graph.operands().exist(out));
+  ASSERT_FALSE(graph.operands().exist(unused));
+}
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc

index 0e742e1e4338df3e80e8364046ba6f6a935f36e5..0183b6276fb124c295777f0c07290b1755f3fb38 100644 (file)
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -73,7 +73,7 @@ public:
      graph->addInput(operand_lhs);
      graph->addInput(operand_rhs1);
      graph->addOutput(operand_result2);
-    graph->finishBuilding();
+    graph->verify();
  
      // Compile
      auto subgs = std::make_shared<onert::ir::Subgraphs>();
diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc

index 6b0c35a79bb0e1ea0be58aac466b87180d48b5e5..178b61ea55be28b9c72985bde6956d914ae99b06 100644 (file)
--- a/runtime/onert/test/core/exec/ExecTime.test.cc
+++ b/runtime/onert/test/core/exec/ExecTime.test.cc
@@ -45,9 +45,7 @@ struct MockBackend : public ::onert::backend::Backend
    {
      return std::make_shared<MockConfig>();
    }
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &,
-                                             const std::shared_ptr<custom::IKernelBuilder> &kb,
-                                             bool) const override
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override
    {
      return nullptr;
    }
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc

index 327c38f7982337ea4cfa773abfa15fbbcc737653..a9f7cd46ac7f049c552ef1e44cf516c45bd98c86 100644 (file)
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ b/runtime/onert/test/core/interp/ExecManager.cc
@@ -71,7 +71,7 @@ protected:
      _graph->getInputs().append(operand_rhs);
      _graph->getOutputs().append(operand_result);
  
-    _graph->finishBuilding();
+    _graph->verify();
  
      auto subgs = std::make_shared<onert::ir::Subgraphs>();
      subgs->push(onert::ir::SubgraphIndex{0}, _graph);
@@ -136,7 +136,7 @@ protected:
      _graph->getInputs().append(operand_rhs1);
      _graph->getOutputs().append(operand_result2);
  
-    _graph->finishBuilding();
+    _graph->verify();
  
      auto subgs = std::make_shared<onert::ir::Subgraphs>();
      subgs->push(onert::ir::SubgraphIndex{0}, _graph);
@@ -189,7 +189,7 @@ protected:
      _graph->getInputs().append(operand_rhs);
      _graph->getOutputs().append(operand_result);
  
-    _graph->finishBuilding();
+    _graph->verify();
  
      auto subgs = std::make_shared<onert::ir::Subgraphs>();
      subgs->push(onert::ir::SubgraphIndex{0}, _graph);
@@ -213,7 +213,7 @@ protected:
  TEST_F(InterpExecutorTest, create_empty)
  {
    Graph graph;
-  graph.finishBuilding();
+  graph.verify();
    auto executor = std::make_unique<InterpExecutor>(graph);
    ASSERT_NE(executor, nullptr);
  }
diff --git a/runtime/onert/test/core/ir/Graph.cc b/runtime/onert/test/core/ir/Graph.cc

new file mode 100644 (file)

index 0000000..d6de7c0
--- /dev/null
+++ b/runtime/onert/test/core/ir/Graph.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+#include "ir/verifier/Verifier.h"
+
+TEST(Graph, neg_inputs_and_outputs)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::OperandIndex index0{0u};
+  onert::ir::OperandIndex index1{1u};
+
+  graph.addInput({index0});
+  graph.addInput({index1});
+
+  onert::ir::OperandIndex index10{10u};
+  onert::ir::OperandIndex index11{11u};
+  onert::ir::OperandIndex index12{12u};
+
+  graph.addOutput({index10});
+  graph.addOutput({index11});
+  graph.addOutput({index12});
+
+  ASSERT_EQ(graph.getInputs().size(), 2);
+  ASSERT_EQ(graph.getOutputs().size(), 3);
+
+  onert::ir::IOIndex io_index0{0};
+  onert::ir::IOIndex io_index1{1};
+  onert::ir::IOIndex io_index2{2};
+
+  ASSERT_EQ(graph.getInputs().at(io_index0), 0);
+  ASSERT_EQ(graph.getInputs().at(io_index1), 1);
+
+  ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
+  ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
+  ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
+
+  EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
+}
+
+using namespace onert::ir;
+
+OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
+                               const OperandIndexSequence outputs)
+{
+  // Add "ADD" operation
+  operation::BinaryArithmetic::Param param;
+  param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+  param.activation = Activation::NONE;
+  return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
+}
+
+TEST(Graph, OneOpGraphSimpleValid)
+{
+  // Simple Graph with just one Add operation
+
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto lhs = graph.addOperand(shape, type);
+  auto rhs = graph.addOperand(shape, type);
+  auto res = graph.addOperand(shape, type);
+
+  addAddOperation(graph, {lhs, rhs}, {res});
+
+  // Set model inputs/outputs
+  graph.addInput(lhs);
+  graph.addInput(rhs);
+  graph.addOutput(res);
+
+  graph.verify();
+
+  SUCCEED();
+}
+
+TEST(Graph, neg_InvalidGraph_BadInput)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+  graph.addInput(OperandIndex{89}); // Non-exisiting operand!
+
+  EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidGraph_BadOutput)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+  graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
+
+  EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidAddOperation_BadInputIndex)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto lhs = graph.addOperand(shape, type);
+  auto rhs = graph.addOperand(shape, type);
+  auto res = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(lhs);
+  graph.addInput(rhs);
+  graph.addOutput(res);
+
+  ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid());
+}
diff --git a/runtime/onert/test/core/ir/LayoutSet.cc b/runtime/onert/test/core/ir/LayoutSet.cc

new file mode 100644 (file)

index 0000000..591710a
--- /dev/null
+++ b/runtime/onert/test/core/ir/LayoutSet.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/LayoutSet.h"
+
+using onert::ir::Layout;
+using onert::ir::LayoutSet;
+
+TEST(ir_LayoutSet, neg_add_remove)
+{
+  LayoutSet set{Layout::NCHW};
+  set.remove(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 2);
+  set.remove(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.remove(Layout::NCHW);
+  ASSERT_EQ(set.size(), 0);
+  set.remove(Layout::NCHW);
+  ASSERT_EQ(set.size(), 0);
+}
+
+TEST(ir_LayoutSet, neg_add_twice)
+{
+  LayoutSet set;
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+}
+
+TEST(ir_LayoutSet, set_operators)
+{
+  LayoutSet set1{Layout::NCHW};
+  LayoutSet set2{Layout::NHWC};
+  LayoutSet set3 = set1 | set2;
+
+  ASSERT_EQ(set3.size(), 2);
+
+  ASSERT_EQ((set3 - set1).size(), 1);
+  ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
+  ASSERT_EQ((set3 - set2).size(), 1);
+  ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
+  ASSERT_EQ((set3 - set3).size(), 0);
+
+  ASSERT_EQ((set3 & set1).size(), 1);
+  ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
+  ASSERT_EQ((set3 & set2).size(), 1);
+  ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
+  ASSERT_EQ((set1 & set2).size(), 0);
+}
diff --git a/runtime/onert/test/core/ir/MockNode.h b/runtime/onert/test/core/ir/MockNode.h

new file mode 100644 (file)

index 0000000..0e7ed97
--- /dev/null
+++ b/runtime/onert/test/core/ir/MockNode.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__
+#define __ONERT_TEST_GRAPH_MOCK_NODE_H__
+
+#include "ir/Operation.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert_test
+{
+namespace ir
+{
+
+class SimpleMock : public onert::ir::Operation
+{
+public:
+  SimpleMock(const onert::ir::OperandIndexSequence &inputs,
+             const onert::ir::OperandIndexSequence &outputs)
+    : Operation{onert::ir::OperandConstraint::createAny()}
+  {
+    setInputs(inputs);
+    setOutputs(outputs);
+  }
+
+public:
+  void accept(onert::ir::OperationVisitor &) const override {}
+  onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; }
+};
+
+} // namespace ir
+} // namespace onert_test
+
+#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__
diff --git a/runtime/onert/test/core/ir/OperandIndexSet.cc b/runtime/onert/test/core/ir/OperandIndexSet.cc

new file mode 100644 (file)

index 0000000..c363e54
--- /dev/null
+++ b/runtime/onert/test/core/ir/OperandIndexSet.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/OperandIndexSequence.h"
+
+using onert::ir::OperandIndex;
+using onert::ir::OperandIndexSequence;
+
+TEST(ir_OperandIndexSequence, neg_append)
+{
+  OperandIndexSequence iset{0, 2, 4, 8};
+
+  ASSERT_EQ(iset.size(), 4);
+
+  iset.append(OperandIndex{10});
+
+  ASSERT_EQ(iset.size(), 5);
+
+  onert::ir::IOIndex index1{1};
+  onert::ir::IOIndex index2{4};
+
+  ASSERT_EQ(iset.at(index1), 2);
+  ASSERT_EQ(iset.at(index2), 10);
+
+  ASSERT_TRUE(iset.contains(OperandIndex{2}));
+  ASSERT_TRUE(iset.contains(OperandIndex{10}));
+  ASSERT_FALSE(iset.contains(OperandIndex{11}));
+}
+
+TEST(graph_OperandIndexSequence, neg_replace)
+{
+  OperandIndexSequence iset{0, 1, 2, 3};
+
+  iset.replace(OperandIndex{1}, OperandIndex{9});
+  ASSERT_FALSE(iset.contains(OperandIndex{1}));
+  ASSERT_TRUE(iset.contains(OperandIndex{9}));
+}
diff --git a/runtime/onert/test/core/ir/OperandSet.cc b/runtime/onert/test/core/ir/OperandSet.cc

new file mode 100644 (file)

index 0000000..6cf9c88
--- /dev/null
+++ b/runtime/onert/test/core/ir/OperandSet.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Operands.h"
+
+TEST(ir_Operands, neg_set_test)
+{
+  onert::ir::Operands set;
+
+  onert::ir::Shape shape0{1, 2, 3};
+
+  onert::ir::Shape shape1(4);
+  shape1.dim(0) = 10;
+  shape1.dim(1) = 20;
+  shape1.dim(2) = 30;
+  shape1.dim(3) = 40;
+
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  set.emplace(shape0, type);
+  set.emplace(shape1, type);
+
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
+
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
+}
diff --git a/runtime/onert/test/core/ir/OperationSet.cc b/runtime/onert/test/core/ir/OperationSet.cc

new file mode 100644 (file)

index 0000000..4a17eeb
--- /dev/null
+++ b/runtime/onert/test/core/ir/OperationSet.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "MockNode.h"
+#include "ir/Operations.h"
+
+using onert::ir::Operation;
+using onert::ir::OperationIndex;
+using onert::ir::Operations;
+
+TEST(ir_Operations, basic)
+{
+  Operations ops;
+  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+  OperationIndex idx{0u};
+  ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
+  ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
+}
+
+TEST(ir_Operations, neg_at)
+{
+  Operations ops;
+  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+  OperationIndex idx{99u};
+  EXPECT_THROW(ops.at(idx), std::out_of_range);
+}
diff --git a/runtime/onert/test/core/ir/SetIO.cc b/runtime/onert/test/core/ir/SetIO.cc

new file mode 100644 (file)

index 0000000..68b4773
--- /dev/null
+++ b/runtime/onert/test/core/ir/SetIO.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/Concat.h"
+
+#include <memory>
+
+#include <stdexcept>
+
+using Index = onert::ir::IOIndex;
+using IndexSet = onert::ir::OperandIndexSequence;
+
+TEST(ir_Operation_setIO, operation_setIO_conv)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  // Add Conv
+  using Graph = onert::ir::operation::Conv2D;
+
+  auto input_operand = graph.addOperand(shape, type);
+  auto kernel_operand = graph.addOperand(shape, type);
+  auto bias_operand = graph.addOperand(shape, type);
+  IndexSet inputs{input_operand, kernel_operand, bias_operand};
+
+  Graph::Param conv_params;
+  conv_params.padding.type = onert::ir::PaddingType::SAME;
+  conv_params.stride.horizontal = 1;
+  conv_params.stride.vertical = 1;
+  conv_params.activation = onert::ir::Activation::NONE;
+
+  auto output_operand = graph.addOperand(shape, type).value();
+  IndexSet outputs{output_operand};
+
+  auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
+
+  ASSERT_NE(conv, nullptr);
+  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  conv->setInputs({8, 9, 10});
+  ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
+}
+
+TEST(ir_Operation_setIO, neg_operation_setIO_concat)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  using Graph = onert::ir::operation::Concat;
+
+  // Add Concat
+  IndexSet inputs;
+  for (int i = 0; i < 6; ++i)
+  {
+    inputs.append(graph.addOperand(shape, type));
+  }
+
+  Graph::Param concat_params{0};
+
+  auto output_operand = graph.addOperand(shape, type).value();
+  IndexSet outputs{output_operand};
+
+  auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
+
+  ASSERT_NE(concat, nullptr);
+  ASSERT_EQ(concat->getInputs().size(), 6);
+  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+
+  concat->setInputs({80, 6, 9, 11});
+  ASSERT_EQ(concat->getInputs().size(), 4);
+  ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
+  ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
+  ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
+}
diff --git a/runtime/onert/test/core/ir/Shape.cc b/runtime/onert/test/core/ir/Shape.cc

new file mode 100644 (file)

index 0000000..c24aeda
--- /dev/null
+++ b/runtime/onert/test/core/ir/Shape.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <ir/Shape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeTest, basic_test)
+{
+  {
+    onert::ir::Shape shape(3);
+
+    shape.dim(0) = 1;
+    shape.dim(1) = 2;
+    shape.dim(2) = 3;
+
+    ASSERT_EQ(shape.rank(), 3);
+    ASSERT_EQ(shape.num_elements(), 6);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+  }
+  {
+    onert::ir::Shape shape; // scalar or rank is unspecified
+
+    ASSERT_EQ(shape.rank(), 0);
+    ASSERT_EQ(shape.num_elements(), 1);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+  }
+}
+
+TEST(ShapeTest, neg_basic_test)
+{
+  {
+    onert::ir::Shape shape(2);
+
+    shape.dim(0) = 1;
+    shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
+
+    ASSERT_EQ(shape.rank(), 2);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), true);
+    EXPECT_ANY_THROW(shape.num_elements());
+  }
+}
diff --git a/runtime/onert/test/core/ir/UseDef.cc b/runtime/onert/test/core/ir/UseDef.cc

new file mode 100644 (file)

index 0000000..47c98f9
--- /dev/null
+++ b/runtime/onert/test/core/ir/UseDef.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+#include "ir/verifier/Verifier.h"
+#include <memory>
+#include "MockNode.h"
+
+#include <typeindex>
+
+namespace
+{
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+} // namespace
+
+TEST(ir_Operand, neg_usedef)
+{
+  onert::ir::Graph graph;
+  onert::ir::verifier::DAGChecker verifier;
+
+  onert::ir::Shape shape(3);
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  // Model Input/Output
+  auto input_operand = graph.addOperand(shape, type);
+  auto output_operand = graph.addOperand(shape, type);
+
+  graph.addInput(input_operand);
+  graph.addOutput(output_operand);
+
+  // MockNode1
+  auto operand_index1 = graph.addOperand(shape, type);
+  auto mocknode_index1 =
+    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+
+  // MockNode2
+  auto operand_index2 = graph.addOperand(shape, type);
+  auto mocknode_index2 =
+    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+
+  // MockNode3(two input)
+  auto multiinput_index = graph.addOperation(
+    std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+
+  graph.verify();
+
+  ASSERT_TRUE(verifier.verify(graph));
+
+  // Check def
+  ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
+  ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
+  ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
+
+  ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
+  ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
+
+  // Check use
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
+  ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
+  ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
+
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
+  ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
+  ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
+}
diff --git a/runtime/onert/test/core/ir/Verifier.cc b/runtime/onert/test/core/ir/Verifier.cc

new file mode 100644 (file)

index 0000000..b4be2d9
--- /dev/null
+++ b/runtime/onert/test/core/ir/Verifier.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Operation.h"
+#include "ir/Graph.h"
+#include "ir/verifier/Verifier.h"
+#include <memory>
+#include "ir/Operand.h"
+#include "MockNode.h"
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+TEST(Verifier, dag_checker)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
+
+  onert::ir::verifier::DAGChecker verifier;
+
+  ASSERT_TRUE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_1)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+  auto op_ind = graph.addOperation(std::move(mock_op));
+
+  graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
+
+  onert::ir::verifier::EdgeChecker verifier;
+  ASSERT_FALSE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_2)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+  auto mock_op_ptr = mock_op.get();
+  auto op_ind = graph.addOperation(std::move(mock_op));
+
+  mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
+
+  onert::ir::verifier::EdgeChecker verifier;
+  ASSERT_FALSE(verifier.verify(graph));
+}
diff --git a/runtime/onert/test/core/util/Index.cc b/runtime/onert/test/core/util/Index.cc

new file mode 100644 (file)

index 0000000..2d110e3
--- /dev/null
+++ b/runtime/onert/test/core/util/Index.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "util/Index.h"
+
+using Index = ::onert::util::Index<uint32_t, struct TestTag>;
+
+TEST(Index, neg_index_test)
+{
+  Index idx1{1u};
+  Index idx2{2u};
+  Index idx3{idx1};
+
+  ASSERT_EQ(idx1, 1);
+  ASSERT_EQ(idx1, 1u);
+  ASSERT_EQ(idx1.value(), 1u);
+  ASSERT_NE(idx1, idx2);
+  ASSERT_EQ(idx1, idx3);
+}
diff --git a/runtime/onert/test/core/util/ObjectManager.cc b/runtime/onert/test/core/util/ObjectManager.cc

new file mode 100644 (file)

index 0000000..78f044e
--- /dev/null
+++ b/runtime/onert/test/core/util/ObjectManager.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "util/ObjectManager.h"
+#include "util/Index.h"
+
+using namespace onert;
+
+struct TestTag;
+using Index = typename util::Index<uint32_t, TestTag>;
+
+TEST(ObjectManager, emplace)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index = man.emplace(100);
+  ASSERT_EQ(man.at(index), 100);
+}
+
+TEST(ObjectManager, neg_remove_1)
+{
+  util::ObjectManager<Index, int> man;
+
+  Index index = man.emplace(100);
+  ASSERT_TRUE(man.exist(index));
+  ASSERT_EQ(man.at(index), 100);
+
+  man.remove(index);
+  ASSERT_FALSE(man.exist(index));
+}
+
+TEST(ObjectManager, neg_remove_2)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  ASSERT_TRUE(man.exist(index0));
+  ASSERT_EQ(man.at(index0), 100);
+  ASSERT_TRUE(man.exist(index1));
+  ASSERT_EQ(man.at(index1), 200);
+
+  man.remove(index0);
+  ASSERT_FALSE(man.exist(index0));
+  ASSERT_TRUE(man.exist(index1));
+  ASSERT_EQ(man.at(index1), 200);
+}
+
+TEST(ObjectManager, push)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Not specify index
+  auto index = man.push(std::make_unique<int>(100));
+  ASSERT_EQ(man.at(index), 100);
+
+  // Specify index
+  auto index2 = man.push(std::make_unique<int>(200), Index{33});
+  ASSERT_EQ(index2.value(), 33);
+  ASSERT_EQ(man.at(index2), 200);
+
+  auto index3 = man.push(std::make_unique<int>(300));
+  // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+  ASSERT_EQ(index3.value(), 34);
+  ASSERT_EQ(man.at(index3), 300);
+
+  auto index4 = man.push(std::make_unique<int>(400), Index{22});
+  ASSERT_EQ(index4.value(), 22);
+  ASSERT_EQ(man.at(index4), 400);
+
+  auto index5 = man.push(std::make_unique<int>(500));
+  // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+  ASSERT_EQ(index5.value(), 35);
+  ASSERT_EQ(man.at(index5), 500);
+}
+
+TEST(ObjectManager, neg_push)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Specify index
+  auto index = man.push(std::make_unique<int>(100), Index{55});
+  ASSERT_EQ(index.value(), 55);
+  ASSERT_EQ(man.at(index), 100);
+
+  // Specify the same index
+  auto index2 = man.push(std::make_unique<int>(200), Index{55});
+  ASSERT_FALSE(index2.valid());
+}
+
+static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max();
+
+TEST(ObjectManager, neg_push_undefined_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Try inserting invalid(undefined) index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32});
+  ASSERT_FALSE(index.valid());
+  ASSERT_EQ(man.size(), 0);
+}
+
+TEST(ObjectManager, neg_push_max_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Insert an object with maximum valid index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+  ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+  ASSERT_EQ(man.at(index), 100);
+  ASSERT_EQ(man.size(), 1);
+
+  // Reached to the final index so next push/emplace must fail
+  auto index2 = man.push(std::make_unique<int>(200));
+  ASSERT_EQ(man.size(), 1);
+  ASSERT_FALSE(index2.valid());
+}
+
+TEST(ObjectManager, neg_emplace_max_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Insert an object with maximum valid index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+  ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+  ASSERT_EQ(man.at(index), 100);
+  ASSERT_EQ(man.size(), 1);
+
+  // Reached to the final index so next push/emplace must fail
+  auto index3 = man.emplace(200);
+  ASSERT_EQ(man.size(), 1);
+  ASSERT_FALSE(index3.valid());
+}
+
+TEST(ObjectManager, const_iterate)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  auto index2 = man.emplace(300);
+
+  int sum = 0;
+  man.iterate([&](const Index &index, const int &val) { sum += val; });
+  ASSERT_EQ(sum, 600);
+}
+
+TEST(ObjectManager, non_const_iterate)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  auto index2 = man.emplace(300);
+
+  man.iterate([&](const Index &index, int &val) { val += 1; });
+  ASSERT_EQ(man.at(index0), 101);
+  ASSERT_EQ(man.at(index1), 201);
+  ASSERT_EQ(man.at(index2), 301);
+}
+
+TEST(ObjectManager, set)
+{
+  util::ObjectManager<Index, int> man;
+  auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert
+  ASSERT_EQ(index, Index{1});
+  auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite
+  ASSERT_EQ(index2, index);
+  ASSERT_EQ(man.at(index2), 200);
+}
+
+TEST(ObjectManager, neg_set)
+{
+  auto v = std::make_unique<int>(100);
+  util::ObjectManager<Index, int> man;
+  auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index
+  ASSERT_EQ(index, Index{});
+  ASSERT_FALSE(index.valid());
+  ASSERT_NE(v, nullptr); // v must be kept when failure
+}
+
+TEST(ObjectManager, getRawPtr)
+{
+  auto v = std::make_unique<int>(100);
+  auto v_ptr = v.get();
+  util::ObjectManager<Index, int> man;
+  auto index = man.push(std::move(v));
+  ASSERT_EQ(v_ptr, man.getRawPtr(index));
+}
+
+TEST(ObjectManager, neg_getRawPtr)
+{
+  util::ObjectManager<Index, int> man;
+  auto ptr = man.getRawPtr(Index{1});
+  ASSERT_EQ(ptr, nullptr);
+}
diff --git a/runtime/onert/test/core/util/ShapeInference.cc b/runtime/onert/test/core/util/ShapeInference.cc

new file mode 100644 (file)

index 0000000..2ecaa28
--- /dev/null
+++ b/runtime/onert/test/core/util/ShapeInference.cc
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Layout.h"
+#include "util/ShapeInference.h"
+
+using namespace onert::ir;
+
+TEST(ShapeInference, Elementwise)
+{
+  Shape lhs_shape{1, 299, 299, 3};
+  Shape rhs_shape{3};
+  auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.dim(0), 1);
+  ASSERT_EQ(infered_out_shape.dim(1), 299);
+  ASSERT_EQ(infered_out_shape.dim(2), 299);
+  ASSERT_EQ(infered_out_shape.dim(3), 3);
+}
+
+TEST(ShapeInference, neg_Elementwise)
+{
+  Shape lhs_shape{1, 299, 299, 3};
+  Shape rhs_shape{5, 3};
+  ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
+}
+
+TEST(ShapeInference, Pool2DNodeSame)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{3, 7};
+  Padding padding{PaddingType::SAME};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeValid)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{3, 7};
+  Padding padding{PaddingType::VALID};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeExplicit)
+{
+  Shape in_shape{10, 3, 5, 20};
+
+  Stride stride{3, 7};
+  Padding padding{4, 3, 2, 1};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{0, 7};
+  Padding padding{PaddingType::SAME};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, Conv2D)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{30, 3, 6, 20};
+
+  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+                                 Dilation{1, 1}};
+  auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+                                   Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+  param =
+    operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+}
+
+TEST(ShapeInference, neg_Conv2D_InvalidStride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{30, 3, 6, 20};
+
+  operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
+                                 Dilation{1, 1}};
+  ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, DepthwiseConv2D)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{1, 3, 6, 60};
+
+  operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
+                                          Activation::NONE, Dilation{1, 1}};
+  auto infered_out_shape =
+    onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
+                                            Activation::NONE, Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
+                                            Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+}
+
+TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{1, 3, 6, 60};
+
+  operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
+                                          Activation::NONE, Dilation{1, 1}};
+  ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, Concat)
+{
+  {
+    Shape in1{10, 20, 30, 3, 50};
+    Shape in2{10, 20, 30, 2, 50};
+    Shape in3{10, 20, 30, 2, 50};
+
+    operation::Concat::Param param{3};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 5);
+    ASSERT_EQ(infered_out_shape.dim(0), 10);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 30);
+    ASSERT_EQ(infered_out_shape.dim(3), 7);
+    ASSERT_EQ(infered_out_shape.dim(4), 50);
+  }
+  {
+    // case 1. when axis < 0
+    Shape in1{10, 20, 2};
+    Shape in2{10, 20, 3};
+
+    operation::Concat::Param param{-1};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 3);
+    ASSERT_EQ(infered_out_shape.dim(0), 10);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 5);
+  }
+  {
+    // case 2. when axis < 0
+    Shape in1{2, 20, 2};
+    Shape in2{3, 20, 2};
+
+    operation::Concat::Param param{-3};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 3);
+    ASSERT_EQ(infered_out_shape.dim(0), 5);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 2);
+  }
+}
+
+TEST(ShapeInference, neg_Concat)
+{
+  {
+    operation::Concat::Param param{2};
+    Shape in1{10, 1, 3};
+    Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
+
+    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+  }
+  { // wrong rank
+    operation::Concat::Param param{2};
+    Shape in1{10, 2, 3, 4};
+    Shape in2{10, 2, 4}; // rank should be 4
+
+    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+  }
+}
+
+TEST(ShapeInference, ExpandDims)
+{
+  Shape in_shape{30, 40};
+
+  auto check = [&](int32_t axis, Shape &expected) {
+    auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
+
+    ASSERT_EQ(actual.rank(), 3);
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  { // boundary
+    int32_t axis = 0;
+    Shape expected{1, 30, 40};
+    check(axis, expected);
+  }
+  { // boundary
+    int32_t axis = 2;
+    Shape expected{30, 40, 1};
+    check(axis, expected);
+  }
+  { // inside
+    int32_t axis = 1;
+    Shape expected{30, 1, 40};
+    check(axis, expected);
+  }
+  { // negative boundary
+    int32_t axis = -1;
+    Shape expected{30, 40, 1};
+    check(axis, expected);
+  }
+  { // negative boundary
+    int32_t axis = -3;
+    Shape expected{1, 30, 40};
+    check(axis, expected);
+  }
+}
+
+TEST(ShapeInference, neg_ExpandDims)
+{
+  Shape in_shape{30, 40};
+
+  { // over boundary
+    int32_t axis = 3;
+    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+  }
+  { // over boundary
+    int32_t axis = -4;
+    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+  }
+}
+
+TEST(ShapeInference, FullyConnected)
+{
+  Shape in_shape{3, 4, 5, 6};
+  Shape ker_shape{3, 10};
+  auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
+
+  ASSERT_EQ(infered_out_shape.rank(), 2);
+  ASSERT_EQ(infered_out_shape.dim(0), 36);
+  ASSERT_EQ(infered_out_shape.dim(1), 3);
+}
+
+TEST(ShapeInference, Transpose)
+{
+  auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
+    // pre-conditions
+    ASSERT_EQ(in_shape.rank(), perm.size());
+    ASSERT_EQ(expected.rank(), perm.size());
+    auto inferred_out_shape =
+      onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+    // post-conditions
+    ASSERT_EQ(inferred_out_shape.rank(), perm.size());
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+    {
+      ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
+    }
+  };
+  // check for 2-D
+  {
+    Shape in_shape{2, 3};
+    std::vector<int> perm = {1, 0};
+    Shape expected{3, 2};
+    // int32_t rank = 2;
+    check(in_shape, perm, expected);
+  }
+  // check for 3-D
+  {
+    Shape in_shape{1, 2, 3};
+    std::vector<int> perm = {2, 0, 1};
+    Shape expected{3, 1, 2};
+    // int32_t rank = 3;
+    check(in_shape, perm, expected);
+  }
+  // check for 4-D
+  {
+    Shape in_shape{1, 2, 3, 4};
+    std::vector<int> perm = {1, 3, 0, 2};
+    Shape expected{2, 4, 1, 3};
+    // int32_t rank = 4;
+    check(in_shape, perm, expected);
+  }
+}
+
+TEST(ShapeInference, neg_Transpose)
+{
+  Shape in_shape{1, 2, 3};
+  // Invalid parameter size
+  {
+    std::vector<int> perm = {2, 0, 1, 0};
+    // int32_t rank = 3;
+    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+                 std::runtime_error);
+  }
+  // Invalid parameter value
+  {
+    std::vector<int> perm = {2, 0, 3};
+    // int32_t rank = 3;
+    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+                 std::runtime_error);
+  }
+}
+
+TEST(ShapeInference, Gather)
+{
+  auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
+    int rank = input.rank();
+    auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
+
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  // check for 2-D, 3-D, axis 0
+  {
+    Shape input{3, 4};
+    Shape indices{1, 1, 2};
+    int32_t axis = 0;
+    Shape expected{1, 1, 2, 4};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 2-D, 3-D, axis 1
+  {
+    Shape input{3, 4};
+    Shape indices{1, 2, 1};
+    int32_t axis = 1;
+    Shape expected{3, 1, 2, 1};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 3-D, 2-D, axis 0
+  {
+    Shape input{2, 3, 4};
+    Shape indices{1, 2};
+    int32_t axis = 0;
+    Shape expected{1, 2, 3, 4};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 3-D, 2-D, axis 2
+  {
+    Shape input{2, 3, 4};
+    Shape indices{2, 1};
+    int32_t axis = 2;
+    Shape expected{2, 3, 2, 1};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 4D, axis 0
+  {
+    Shape input{1, 2, 3, 4};
+    Shape indices{2};
+    int32_t axis = 0;
+    Shape expected{2, 2, 3, 4};
+    check(input, indices, expected, axis);
+  }
+}
+
+TEST(ShapeInference, BCQFullyConnected)
+{
+  auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
+                   Shape &expected) {
+    auto actual =
+      onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  {
+    Shape in_shape{10, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+
+    Shape expected{30, 1};
+    check(in_shape, cluster_shape, cluster, expected);
+  }
+
+  {
+    Shape in_shape{1, 1};
+    Shape cluster_shape{1, 2};
+    std::vector<int> cluster = {3, 50};
+
+    Shape expected{50, 1};
+    check(in_shape, cluster_shape, cluster, expected);
+  }
+}
+
+TEST(ShapeInference, BCQGather)
+{
+  auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
+                   uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
+    operation::BCQGather::Param param{hidden_size, axis};
+    auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+                                                              cluster.data(), rank, param);
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  {
+    Shape indices_shape{5, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+    uint32_t hidden_size = 10;
+    uint32_t axis = 0;
+    int rank = 2;
+
+    Shape expected{5, 1, 10};
+    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+  }
+
+  {
+    Shape indices_shape{5, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+    uint32_t hidden_size = 10;
+    uint32_t axis = 1;
+    int rank = 2;
+
+    Shape expected{30, 5, 1};
+    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+  }
+}
diff --git a/runtime/onert/test/graph/Graph.cc b/runtime/onert/test/graph/Graph.cc

deleted file mode 100644 (file)

index 6461a08..0000000
--- a/runtime/onert/test/graph/Graph.cc
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/operation/BinaryArithmetic.h"
-#include "ir/verifier/Verifier.h"
-
-TEST(Graph, neg_inputs_and_outputs)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::OperandIndex index0{0u};
-  onert::ir::OperandIndex index1{1u};
-
-  graph.addInput({index0});
-  graph.addInput({index1});
-
-  onert::ir::OperandIndex index10{10u};
-  onert::ir::OperandIndex index11{11u};
-  onert::ir::OperandIndex index12{12u};
-
-  graph.addOutput({index10});
-  graph.addOutput({index11});
-  graph.addOutput({index12});
-
-  ASSERT_EQ(graph.getInputs().size(), 2);
-  ASSERT_EQ(graph.getOutputs().size(), 3);
-
-  onert::ir::IOIndex io_index0{0};
-  onert::ir::IOIndex io_index1{1};
-  onert::ir::IOIndex io_index2{2};
-
-  ASSERT_EQ(graph.getInputs().at(io_index0), 0);
-  ASSERT_EQ(graph.getInputs().at(io_index1), 1);
-
-  ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
-  ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
-  ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
-
-  EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
-}
-
-using namespace onert::ir;
-
-OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
-                               const OperandIndexSequence outputs)
-{
-  // Add "ADD" operation
-  operation::BinaryArithmetic::Param param;
-  param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-  param.activation = Activation::NONE;
-  return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
-}
-
-TEST(Graph, OneOpGraphFinish)
-{
-  // Simple Graph with just one Add operation
-
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto lhs = graph.addOperand(shape, type);
-  auto rhs = graph.addOperand(shape, type);
-  auto res = graph.addOperand(shape, type);
-
-  addAddOperation(graph, {lhs, rhs}, {res});
-
-  // Set model inputs/outputs
-  graph.addInput(lhs);
-  graph.addInput(rhs);
-  graph.addOutput(res);
-
-  graph.finishBuilding();
-
-  SUCCEED();
-}
-
-TEST(Graph, neg_InvalidGraphFinish_BadInput)
-{
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto in = graph.addOperand(shape, type);
-  auto out = graph.addOperand(shape, type);
-
-  // Set model inputs/outputs
-  graph.addInput(in);
-  graph.addOutput(out);
-  graph.addInput(OperandIndex{89}); // Non-exisiting operand!
-
-  EXPECT_ANY_THROW(graph.finishBuilding());
-}
-
-TEST(Graph, neg_InvalidGraphFinish_BadOutput)
-{
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto in = graph.addOperand(shape, type);
-  auto out = graph.addOperand(shape, type);
-
-  // Set model inputs/outputs
-  graph.addInput(in);
-  graph.addOutput(out);
-  graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
-
-  EXPECT_ANY_THROW(graph.finishBuilding());
-}
-
-TEST(Graph, neg_InvalidGraphFinish_BadInputOutputForOp)
-{
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto lhs = graph.addOperand(shape, type);
-  auto rhs = graph.addOperand(shape, type);
-  auto res = graph.addOperand(shape, type);
-
-  addAddOperation(graph, {lhs, OperandIndex{99}}, {res});
-
-  // Set model inputs/outputs
-  graph.addInput(lhs);
-  graph.addInput(rhs);
-  graph.addOutput(res);
-
-  EXPECT_ANY_THROW(graph.finishBuilding());
-}
diff --git a/runtime/onert/test/graph/Index.cc b/runtime/onert/test/graph/Index.cc

deleted file mode 100644 (file)

index 2d110e3..0000000
--- a/runtime/onert/test/graph/Index.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/Index.h"
-
-using Index = ::onert::util::Index<uint32_t, struct TestTag>;
-
-TEST(Index, neg_index_test)
-{
-  Index idx1{1u};
-  Index idx2{2u};
-  Index idx3{idx1};
-
-  ASSERT_EQ(idx1, 1);
-  ASSERT_EQ(idx1, 1u);
-  ASSERT_EQ(idx1.value(), 1u);
-  ASSERT_NE(idx1, idx2);
-  ASSERT_EQ(idx1, idx3);
-}
diff --git a/runtime/onert/test/graph/MockNode.h b/runtime/onert/test/graph/MockNode.h

deleted file mode 100644 (file)

index 0e7ed97..0000000
--- a/runtime/onert/test/graph/MockNode.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__
-#define __ONERT_TEST_GRAPH_MOCK_NODE_H__
-
-#include "ir/Operation.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert_test
-{
-namespace ir
-{
-
-class SimpleMock : public onert::ir::Operation
-{
-public:
-  SimpleMock(const onert::ir::OperandIndexSequence &inputs,
-             const onert::ir::OperandIndexSequence &outputs)
-    : Operation{onert::ir::OperandConstraint::createAny()}
-  {
-    setInputs(inputs);
-    setOutputs(outputs);
-  }
-
-public:
-  void accept(onert::ir::OperationVisitor &) const override {}
-  onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; }
-};
-
-} // namespace ir
-} // namespace onert_test
-
-#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__
diff --git a/runtime/onert/test/graph/operand/IndexSet.cc b/runtime/onert/test/graph/operand/IndexSet.cc

deleted file mode 100644 (file)

index c363e54..0000000
--- a/runtime/onert/test/graph/operand/IndexSet.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/OperandIndexSequence.h"
-
-using onert::ir::OperandIndex;
-using onert::ir::OperandIndexSequence;
-
-TEST(ir_OperandIndexSequence, neg_append)
-{
-  OperandIndexSequence iset{0, 2, 4, 8};
-
-  ASSERT_EQ(iset.size(), 4);
-
-  iset.append(OperandIndex{10});
-
-  ASSERT_EQ(iset.size(), 5);
-
-  onert::ir::IOIndex index1{1};
-  onert::ir::IOIndex index2{4};
-
-  ASSERT_EQ(iset.at(index1), 2);
-  ASSERT_EQ(iset.at(index2), 10);
-
-  ASSERT_TRUE(iset.contains(OperandIndex{2}));
-  ASSERT_TRUE(iset.contains(OperandIndex{10}));
-  ASSERT_FALSE(iset.contains(OperandIndex{11}));
-}
-
-TEST(graph_OperandIndexSequence, neg_replace)
-{
-  OperandIndexSequence iset{0, 1, 2, 3};
-
-  iset.replace(OperandIndex{1}, OperandIndex{9});
-  ASSERT_FALSE(iset.contains(OperandIndex{1}));
-  ASSERT_TRUE(iset.contains(OperandIndex{9}));
-}
diff --git a/runtime/onert/test/graph/operand/LayoutSet.cc b/runtime/onert/test/graph/operand/LayoutSet.cc

deleted file mode 100644 (file)

index 6aa01b9..0000000
--- a/runtime/onert/test/graph/operand/LayoutSet.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/LayoutSet.h"
-
-using onert::ir::Layout;
-using onert::ir::LayoutSet;
-
-TEST(ir_LayoutSet, neg_add_remove)
-{
-  LayoutSet set{Layout::NCHW};
-  set.remove(Layout::NHWC);
-  ASSERT_EQ(set.size(), 1);
-  set.add(Layout::NHWC);
-  ASSERT_EQ(set.size(), 2);
-  set.remove(Layout::NHWC);
-  ASSERT_EQ(set.size(), 1);
-  set.remove(Layout::NCHW);
-  ASSERT_EQ(set.size(), 0);
-  set.remove(Layout::NCHW);
-  ASSERT_EQ(set.size(), 0);
-}
-
-TEST(ir_LayoutSet, set_operators)
-{
-  LayoutSet set1{Layout::NCHW};
-  LayoutSet set2{Layout::NHWC};
-  LayoutSet set3 = set1 | set2;
-
-  ASSERT_EQ(set3.size(), 2);
-
-  ASSERT_EQ((set3 - set1).size(), 1);
-  ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
-  ASSERT_EQ((set3 - set2).size(), 1);
-  ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
-  ASSERT_EQ((set3 - set3).size(), 0);
-
-  ASSERT_EQ((set3 & set1).size(), 1);
-  ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
-  ASSERT_EQ((set3 & set2).size(), 1);
-  ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
-  ASSERT_EQ((set1 & set2).size(), 0);
-}
diff --git a/runtime/onert/test/graph/operand/Set.cc b/runtime/onert/test/graph/operand/Set.cc

deleted file mode 100644 (file)

index 6cf9c88..0000000
--- a/runtime/onert/test/graph/operand/Set.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operands.h"
-
-TEST(ir_Operands, neg_set_test)
-{
-  onert::ir::Operands set;
-
-  onert::ir::Shape shape0{1, 2, 3};
-
-  onert::ir::Shape shape1(4);
-  shape1.dim(0) = 10;
-  shape1.dim(1) = 20;
-  shape1.dim(2) = 30;
-  shape1.dim(3) = 40;
-
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  set.emplace(shape0, type);
-  set.emplace(shape1, type);
-
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
-
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
-}
diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc

deleted file mode 100644 (file)

index 5ef1002..0000000
--- a/runtime/onert/test/graph/operand/UseDef.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "../MockNode.h"
-
-#include <typeindex>
-
-namespace
-{
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-} // namespace
-
-TEST(ir_Operand, neg_usedef)
-{
-  onert::ir::Graph graph;
-  onert::ir::verifier::DAGChecker verifier;
-
-  onert::ir::Shape shape(3);
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  // Model Input/Output
-  auto input_operand = graph.addOperand(shape, type);
-  auto output_operand = graph.addOperand(shape, type);
-
-  graph.addInput(input_operand);
-  graph.addOutput(output_operand);
-
-  // MockNode1
-  auto operand_index1 = graph.addOperand(shape, type);
-  auto mocknode_index1 =
-    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
-
-  // MockNode2
-  auto operand_index2 = graph.addOperand(shape, type);
-  auto mocknode_index2 =
-    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
-
-  // MockNode3(two input)
-  auto multiinput_index = graph.addOperation(
-    std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
-
-  graph.finishBuilding();
-
-  ASSERT_TRUE(verifier.verify(graph));
-
-  // Check def
-  ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
-  ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
-  ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
-
-  ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
-  ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
-
-  // Check use
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
-  ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
-  ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
-
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
-  ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
-  ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
-}
diff --git a/runtime/onert/test/graph/operation/Set.cc b/runtime/onert/test/graph/operation/Set.cc

deleted file mode 100644 (file)

index 50c3b30..0000000
--- a/runtime/onert/test/graph/operation/Set.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "../MockNode.h"
-#include "ir/Operations.h"
-
-using onert::ir::Operation;
-using onert::ir::OperationIndex;
-using onert::ir::Operations;
-
-TEST(ir_Operations, basic)
-{
-  Operations ops;
-  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
-  OperationIndex idx{0u};
-  ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
-  ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
-}
-
-TEST(ir_Operations, neg_at)
-{
-  Operations ops;
-  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
-  OperationIndex idx{99u};
-  EXPECT_THROW(ops.at(idx), std::out_of_range);
-}
diff --git a/runtime/onert/test/graph/operation/SetIO.cc b/runtime/onert/test/graph/operation/SetIO.cc

deleted file mode 100644 (file)

index 68b4773..0000000
--- a/runtime/onert/test/graph/operation/SetIO.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Concat.h"
-
-#include <memory>
-
-#include <stdexcept>
-
-using Index = onert::ir::IOIndex;
-using IndexSet = onert::ir::OperandIndexSequence;
-
-TEST(ir_Operation_setIO, operation_setIO_conv)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  // Add Conv
-  using Graph = onert::ir::operation::Conv2D;
-
-  auto input_operand = graph.addOperand(shape, type);
-  auto kernel_operand = graph.addOperand(shape, type);
-  auto bias_operand = graph.addOperand(shape, type);
-  IndexSet inputs{input_operand, kernel_operand, bias_operand};
-
-  Graph::Param conv_params;
-  conv_params.padding.type = onert::ir::PaddingType::SAME;
-  conv_params.stride.horizontal = 1;
-  conv_params.stride.vertical = 1;
-  conv_params.activation = onert::ir::Activation::NONE;
-
-  auto output_operand = graph.addOperand(shape, type).value();
-  IndexSet outputs{output_operand};
-
-  auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
-
-  ASSERT_NE(conv, nullptr);
-  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  conv->setInputs({8, 9, 10});
-  ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
-}
-
-TEST(ir_Operation_setIO, neg_operation_setIO_concat)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  using Graph = onert::ir::operation::Concat;
-
-  // Add Concat
-  IndexSet inputs;
-  for (int i = 0; i < 6; ++i)
-  {
-    inputs.append(graph.addOperand(shape, type));
-  }
-
-  Graph::Param concat_params{0};
-
-  auto output_operand = graph.addOperand(shape, type).value();
-  IndexSet outputs{output_operand};
-
-  auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
-
-  ASSERT_NE(concat, nullptr);
-  ASSERT_EQ(concat->getInputs().size(), 6);
-  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-
-  concat->setInputs({80, 6, 9, 11});
-  ASSERT_EQ(concat->getInputs().size(), 4);
-  ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
-  ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
-  ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
-}
diff --git a/runtime/onert/test/graph/verifier/Verifier.cc b/runtime/onert/test/graph/verifier/Verifier.cc

deleted file mode 100644 (file)

index 3bce274..0000000
--- a/runtime/onert/test/graph/verifier/Verifier.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operation.h"
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "ir/Operand.h"
-#include "../MockNode.h"
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-TEST(Verifier, dag_checker)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  auto operand1 = graph.addOperand(shape, type);
-  auto operand2 = graph.addOperand(shape, type);
-
-  graph.addInput(operand1);
-  graph.addOutput(operand2);
-
-  graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
-
-  graph.finishBuilding();
-
-  onert::ir::verifier::DAGChecker verifier;
-
-  ASSERT_TRUE(verifier.verify(graph));
-}
-
-TEST(Verifier, neg_edge_consistency_checker_1)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  auto operand1 = graph.addOperand(shape, type);
-  auto operand2 = graph.addOperand(shape, type);
-
-  graph.addInput(operand1);
-  graph.addOutput(operand2);
-
-  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
-  auto op_ind = graph.addOperation(std::move(mock_op));
-
-  graph.finishBuilding();
-
-  graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
-
-  onert::ir::verifier::EdgeConsistencyChecker verifier;
-  ASSERT_FALSE(verifier.verify(graph));
-}
-
-TEST(Verifier, neg_edge_consistency_checker_2)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  auto operand1 = graph.addOperand(shape, type);
-  auto operand2 = graph.addOperand(shape, type);
-
-  graph.addInput(operand1);
-  graph.addOutput(operand2);
-
-  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
-  auto mock_op_ptr = mock_op.get();
-  auto op_ind = graph.addOperation(std::move(mock_op));
-
-  graph.finishBuilding();
-
-  mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
-
-  onert::ir::verifier::EdgeConsistencyChecker verifier;
-  ASSERT_FALSE(verifier.verify(graph));
-}
diff --git a/runtime/onert/test/ir/Shape.cc b/runtime/onert/test/ir/Shape.cc

deleted file mode 100644 (file)

index c24aeda..0000000
--- a/runtime/onert/test/ir/Shape.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <ir/Shape.h>
-
-#include <gtest/gtest.h>
-
-TEST(ShapeTest, basic_test)
-{
-  {
-    onert::ir::Shape shape(3);
-
-    shape.dim(0) = 1;
-    shape.dim(1) = 2;
-    shape.dim(2) = 3;
-
-    ASSERT_EQ(shape.rank(), 3);
-    ASSERT_EQ(shape.num_elements(), 6);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
-  }
-  {
-    onert::ir::Shape shape; // scalar or rank is unspecified
-
-    ASSERT_EQ(shape.rank(), 0);
-    ASSERT_EQ(shape.num_elements(), 1);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
-  }
-}
-
-TEST(ShapeTest, neg_basic_test)
-{
-  {
-    onert::ir::Shape shape(2);
-
-    shape.dim(0) = 1;
-    shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
-
-    ASSERT_EQ(shape.rank(), 2);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), true);
-    EXPECT_ANY_THROW(shape.num_elements());
-  }
-}
diff --git a/runtime/onert/test/util/ObjectManager.cc b/runtime/onert/test/util/ObjectManager.cc

deleted file mode 100644 (file)

index 24bb9b0..0000000
--- a/runtime/onert/test/util/ObjectManager.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/ObjectManager.h"
-#include "util/Index.h"
-
-using namespace onert;
-
-struct TestTag;
-using Index = typename util::Index<uint32_t, TestTag>;
-
-TEST(ObjectManager, emplace)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index = man.emplace(100);
-  ASSERT_EQ(man.at(index), 100);
-}
-
-TEST(ObjectManager, neg_remove_1)
-{
-  util::ObjectManager<Index, int> man;
-
-  Index index = man.emplace(100);
-  ASSERT_TRUE(man.exist(index));
-  ASSERT_EQ(man.at(index), 100);
-
-  man.remove(index);
-  ASSERT_FALSE(man.exist(index));
-}
-
-TEST(ObjectManager, neg_remove_2)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  ASSERT_TRUE(man.exist(index0));
-  ASSERT_EQ(man.at(index0), 100);
-  ASSERT_TRUE(man.exist(index1));
-  ASSERT_EQ(man.at(index1), 200);
-
-  man.remove(index0);
-  ASSERT_FALSE(man.exist(index0));
-  ASSERT_TRUE(man.exist(index1));
-  ASSERT_EQ(man.at(index1), 200);
-}
-
-TEST(ObjectManager, push)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index = man.push(std::unique_ptr<int>{new int{100}});
-  ASSERT_EQ(man.at(index), 100);
-}
-
-TEST(ObjectManager, const_iterate)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  auto index2 = man.emplace(300);
-
-  int sum = 0;
-  man.iterate([&](const Index &index, const int &val) { sum += val; });
-  ASSERT_EQ(sum, 600);
-}
-
-TEST(ObjectManager, non_const_iterate)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  auto index2 = man.emplace(300);
-
-  man.iterate([&](const Index &index, int &val) { val += 1; });
-  ASSERT_EQ(man.at(index0), 101);
-  ASSERT_EQ(man.at(index1), 201);
-  ASSERT_EQ(man.at(index2), 301);
-}
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc

deleted file mode 100644 (file)

index 2ecaa28..0000000
--- a/runtime/onert/test/util/ShapeInference.cc
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Layout.h"
-#include "util/ShapeInference.h"
-
-using namespace onert::ir;
-
-TEST(ShapeInference, Elementwise)
-{
-  Shape lhs_shape{1, 299, 299, 3};
-  Shape rhs_shape{3};
-  auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.dim(0), 1);
-  ASSERT_EQ(infered_out_shape.dim(1), 299);
-  ASSERT_EQ(infered_out_shape.dim(2), 299);
-  ASSERT_EQ(infered_out_shape.dim(3), 3);
-}
-
-TEST(ShapeInference, neg_Elementwise)
-{
-  Shape lhs_shape{1, 299, 299, 3};
-  Shape rhs_shape{5, 3};
-  ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
-}
-
-TEST(ShapeInference, Pool2DNodeSame)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Stride stride{3, 7};
-  Padding padding{PaddingType::SAME};
-
-  operation::Pool2D::Param avg_pool_param{
-    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeValid)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Stride stride{3, 7};
-  Padding padding{PaddingType::VALID};
-
-  operation::Pool2D::Param avg_pool_param{
-    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeExplicit)
-{
-  Shape in_shape{10, 3, 5, 20};
-
-  Stride stride{3, 7};
-  Padding padding{4, 3, 2, 1};
-
-  operation::Pool2D::Param avg_pool_param{
-    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Stride stride{0, 7};
-  Padding padding{PaddingType::SAME};
-
-  operation::Pool2D::Param avg_pool_param{
-    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
-               std::runtime_error);
-}
-
-TEST(ShapeInference, Conv2D)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{30, 3, 6, 20};
-
-  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
-                                 Dilation{1, 1}};
-  auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
-  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
-                                   Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
-  param =
-    operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-}
-
-TEST(ShapeInference, neg_Conv2D_InvalidStride)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{30, 3, 6, 20};
-
-  operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
-                                 Dilation{1, 1}};
-  ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
-               std::runtime_error);
-}
-
-TEST(ShapeInference, DepthwiseConv2D)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{1, 3, 6, 60};
-
-  operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
-                                          Activation::NONE, Dilation{1, 1}};
-  auto infered_out_shape =
-    onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
-  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
-                                            Activation::NONE, Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
-  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
-                                            Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-}
-
-TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{1, 3, 6, 60};
-
-  operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
-                                          Activation::NONE, Dilation{1, 1}};
-  ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
-               std::runtime_error);
-}
-
-TEST(ShapeInference, Concat)
-{
-  {
-    Shape in1{10, 20, 30, 3, 50};
-    Shape in2{10, 20, 30, 2, 50};
-    Shape in3{10, 20, 30, 2, 50};
-
-    operation::Concat::Param param{3};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 5);
-    ASSERT_EQ(infered_out_shape.dim(0), 10);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 30);
-    ASSERT_EQ(infered_out_shape.dim(3), 7);
-    ASSERT_EQ(infered_out_shape.dim(4), 50);
-  }
-  {
-    // case 1. when axis < 0
-    Shape in1{10, 20, 2};
-    Shape in2{10, 20, 3};
-
-    operation::Concat::Param param{-1};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 3);
-    ASSERT_EQ(infered_out_shape.dim(0), 10);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 5);
-  }
-  {
-    // case 2. when axis < 0
-    Shape in1{2, 20, 2};
-    Shape in2{3, 20, 2};
-
-    operation::Concat::Param param{-3};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 3);
-    ASSERT_EQ(infered_out_shape.dim(0), 5);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 2);
-  }
-}
-
-TEST(ShapeInference, neg_Concat)
-{
-  {
-    operation::Concat::Param param{2};
-    Shape in1{10, 1, 3};
-    Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
-
-    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
-  }
-  { // wrong rank
-    operation::Concat::Param param{2};
-    Shape in1{10, 2, 3, 4};
-    Shape in2{10, 2, 4}; // rank should be 4
-
-    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
-  }
-}
-
-TEST(ShapeInference, ExpandDims)
-{
-  Shape in_shape{30, 40};
-
-  auto check = [&](int32_t axis, Shape &expected) {
-    auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
-
-    ASSERT_EQ(actual.rank(), 3);
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  { // boundary
-    int32_t axis = 0;
-    Shape expected{1, 30, 40};
-    check(axis, expected);
-  }
-  { // boundary
-    int32_t axis = 2;
-    Shape expected{30, 40, 1};
-    check(axis, expected);
-  }
-  { // inside
-    int32_t axis = 1;
-    Shape expected{30, 1, 40};
-    check(axis, expected);
-  }
-  { // negative boundary
-    int32_t axis = -1;
-    Shape expected{30, 40, 1};
-    check(axis, expected);
-  }
-  { // negative boundary
-    int32_t axis = -3;
-    Shape expected{1, 30, 40};
-    check(axis, expected);
-  }
-}
-
-TEST(ShapeInference, neg_ExpandDims)
-{
-  Shape in_shape{30, 40};
-
-  { // over boundary
-    int32_t axis = 3;
-    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
-  }
-  { // over boundary
-    int32_t axis = -4;
-    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
-  }
-}
-
-TEST(ShapeInference, FullyConnected)
-{
-  Shape in_shape{3, 4, 5, 6};
-  Shape ker_shape{3, 10};
-  auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
-
-  ASSERT_EQ(infered_out_shape.rank(), 2);
-  ASSERT_EQ(infered_out_shape.dim(0), 36);
-  ASSERT_EQ(infered_out_shape.dim(1), 3);
-}
-
-TEST(ShapeInference, Transpose)
-{
-  auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
-    // pre-conditions
-    ASSERT_EQ(in_shape.rank(), perm.size());
-    ASSERT_EQ(expected.rank(), perm.size());
-    auto inferred_out_shape =
-      onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
-    // post-conditions
-    ASSERT_EQ(inferred_out_shape.rank(), perm.size());
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-    {
-      ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
-    }
-  };
-  // check for 2-D
-  {
-    Shape in_shape{2, 3};
-    std::vector<int> perm = {1, 0};
-    Shape expected{3, 2};
-    // int32_t rank = 2;
-    check(in_shape, perm, expected);
-  }
-  // check for 3-D
-  {
-    Shape in_shape{1, 2, 3};
-    std::vector<int> perm = {2, 0, 1};
-    Shape expected{3, 1, 2};
-    // int32_t rank = 3;
-    check(in_shape, perm, expected);
-  }
-  // check for 4-D
-  {
-    Shape in_shape{1, 2, 3, 4};
-    std::vector<int> perm = {1, 3, 0, 2};
-    Shape expected{2, 4, 1, 3};
-    // int32_t rank = 4;
-    check(in_shape, perm, expected);
-  }
-}
-
-TEST(ShapeInference, neg_Transpose)
-{
-  Shape in_shape{1, 2, 3};
-  // Invalid parameter size
-  {
-    std::vector<int> perm = {2, 0, 1, 0};
-    // int32_t rank = 3;
-    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
-                 std::runtime_error);
-  }
-  // Invalid parameter value
-  {
-    std::vector<int> perm = {2, 0, 3};
-    // int32_t rank = 3;
-    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
-                 std::runtime_error);
-  }
-}
-
-TEST(ShapeInference, Gather)
-{
-  auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
-    int rank = input.rank();
-    auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
-
-    ASSERT_EQ(actual.rank(), expected.rank());
-
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  // check for 2-D, 3-D, axis 0
-  {
-    Shape input{3, 4};
-    Shape indices{1, 1, 2};
-    int32_t axis = 0;
-    Shape expected{1, 1, 2, 4};
-    check(input, indices, expected, axis);
-  }
-
-  // check for 2-D, 3-D, axis 1
-  {
-    Shape input{3, 4};
-    Shape indices{1, 2, 1};
-    int32_t axis = 1;
-    Shape expected{3, 1, 2, 1};
-    check(input, indices, expected, axis);
-  }
-
-  // check for 3-D, 2-D, axis 0
-  {
-    Shape input{2, 3, 4};
-    Shape indices{1, 2};
-    int32_t axis = 0;
-    Shape expected{1, 2, 3, 4};
-    check(input, indices, expected, axis);
-  }
-
-  // check for 3-D, 2-D, axis 2
-  {
-    Shape input{2, 3, 4};
-    Shape indices{2, 1};
-    int32_t axis = 2;
-    Shape expected{2, 3, 2, 1};
-    check(input, indices, expected, axis);
-  }
-
-  // check for 4D, axis 0
-  {
-    Shape input{1, 2, 3, 4};
-    Shape indices{2};
-    int32_t axis = 0;
-    Shape expected{2, 2, 3, 4};
-    check(input, indices, expected, axis);
-  }
-}
-
-TEST(ShapeInference, BCQFullyConnected)
-{
-  auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
-                   Shape &expected) {
-    auto actual =
-      onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
-    ASSERT_EQ(actual.rank(), expected.rank());
-
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  {
-    Shape in_shape{10, 1};
-    Shape cluster_shape{3, 2};
-    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
-
-    Shape expected{30, 1};
-    check(in_shape, cluster_shape, cluster, expected);
-  }
-
-  {
-    Shape in_shape{1, 1};
-    Shape cluster_shape{1, 2};
-    std::vector<int> cluster = {3, 50};
-
-    Shape expected{50, 1};
-    check(in_shape, cluster_shape, cluster, expected);
-  }
-}
-
-TEST(ShapeInference, BCQGather)
-{
-  auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
-                   uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
-    operation::BCQGather::Param param{hidden_size, axis};
-    auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
-                                                              cluster.data(), rank, param);
-    ASSERT_EQ(actual.rank(), expected.rank());
-
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  {
-    Shape indices_shape{5, 1};
-    Shape cluster_shape{3, 2};
-    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
-    uint32_t hidden_size = 10;
-    uint32_t axis = 0;
-    int rank = 2;
-
-    Shape expected{5, 1, 10};
-    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
-  }
-
-  {
-    Shape indices_shape{5, 1};
-    Shape cluster_shape{3, 2};
-    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
-    uint32_t hidden_size = 10;
-    uint32_t axis = 1;
-    int rank = 2;
-
-    Shape expected{30, 5, 1};
-    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
-  }
-}
diff --git a/tests/.clang-format b/tests/.clang-format

deleted file mode 120000 (symlink)

index 0ff66f3..0000000
--- a/tests/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../.clang-format.8
-\ No newline at end of file
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon

index d443eba035f0c880f14472fa759477c159fa8600..03bdf09167968c5a50846307333726d05f4b3cca 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon
@@ -74,6 +74,7 @@ GeneratedTests.fill_ex_1D_float
  GeneratedTests.fill_ex_4D_float
  GeneratedTests.fill_ex_dynamic_nnfw
  GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
  GeneratedTests.fully_connected_hybrid_1_nnfw
  GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
  GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon

index d443eba035f0c880f14472fa759477c159fa8600..03bdf09167968c5a50846307333726d05f4b3cca 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
@@ -74,6 +74,7 @@ GeneratedTests.fill_ex_1D_float
  GeneratedTests.fill_ex_4D_float
  GeneratedTests.fill_ex_dynamic_nnfw
  GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
  GeneratedTests.fully_connected_hybrid_1_nnfw
  GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
  GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon

index 2a169f6ae8d4fd9fe38a47339a5406c056a8e795..a3320998ab3d6a8ca7ebf497e67ce8240008fd6e 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -74,6 +74,7 @@ GeneratedTests.fill_ex_1D_float
  GeneratedTests.fill_ex_4D_float
  GeneratedTests.fill_ex_dynamic_nnfw
  GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
  GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
  GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
  GeneratedTests.gather_dynamic_nnfw
diff --git a/tests/nnfw_api/CMakeLists.txt b/tests/nnfw_api/CMakeLists.txt

index 40142dd150cf49f56cce0aace5de2d3c2737f5e6..2e7ef65513ee9afa7cc76a393168c5f33971143a 100644 (file)
--- a/tests/nnfw_api/CMakeLists.txt
+++ b/tests/nnfw_api/CMakeLists.txt
@@ -34,3 +34,25 @@ target_link_libraries(${RUNTIME_NNFW_API_TEST} ${LIB_PTHREAD} dl)
  target_link_libraries(${RUNTIME_NNFW_API_TEST} circle_schema)
  
  install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest_standalone)
+
+# Install nnpackage test model (add)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/add)
+set(NNPACKAGE_INSTALL_TARGET unittest_standalone/nnfw_api_gtest_models)
+
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add)
+
+# Install nnpackage test model (add_no_manifest)
+set(NNPACKAGE_MODEL ${NNPACKAGE_MODEL_DIR}/add.tflite)
+install(FILES ${NNPACKAGE_MODEL} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add_no_manifest/add_no_manifest)
+
+# Install nnpackage test model (add_invalid_manifest)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/add_invalid_manifest)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add_invalid_manifest)
+
+# Install nnpackage test model (if)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/if_dynamic)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/if_dynamic)
+
+# Install nnpackage test model (while)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/while_dynamic)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/while_dynamic)
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc

index 87b38f2384f4d15469fd9bcc1c9fa13a60713769..e3dc571827e31c044067811106e56d4d0e023bce 100644 (file)
--- a/tests/nnfw_api/src/CircleGen.cc
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -52,6 +52,14 @@ uint32_t CircleGen::addTensor(const TensorParams &params, float scale, int64_t z
    return ind;
  }
  
+uint32_t CircleGen::addTensor(const TensorParams &params, std::vector<float> &scale,
+                              std::vector<int64_t> &zero_point)
+{
+  uint32_t ind = curSubgCtx().tensors.size();
+  curSubgCtx().tensors.emplace_back(buildTensor(params, scale, zero_point));
+  return ind;
+}
+
  uint32_t CircleGen::addTensor(const TensorParams &params, const SparsityParams &sp)
  {
    uint32_t ind = curSubgCtx().tensors.size();
@@ -260,6 +268,14 @@ uint32_t CircleGen::addOperatorMean(const OperatorParams &params, bool keep_dims
                                  circle::BuiltinOptions_ReducerOptions, options);
  }
  
+uint32_t CircleGen::addOperatorMul(const OperatorParams &params,
+                                   circle::ActivationFunctionType actfn)
+{
+  auto options = circle::CreateMulOptions(_fbb, actfn).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_MUL,
+                                circle::BuiltinOptions_MulOptions, options);
+}
+
  uint32_t CircleGen::addOperatorNeg(const OperatorParams &params)
  {
    auto options = circle::CreatePadOptions(_fbb).Union();
@@ -288,6 +304,13 @@ uint32_t CircleGen::addOperatorPadV2(const OperatorParams &params)
                                  circle::BuiltinOptions_PadV2Options, options);
  }
  
+uint32_t CircleGen::addOperatorQuantize(const OperatorParams &params)
+{
+  auto options = circle::CreateQuantizeOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_QUANTIZE,
+                                circle::BuiltinOptions_QuantizeOptions, options);
+}
+
  uint32_t CircleGen::addOperatorRank(const OperatorParams &params)
  {
    auto options = circle::CreateRankOptions(_fbb).Union();
@@ -363,12 +386,27 @@ uint32_t CircleGen::addOperatorSelectV2(const OperatorParams &params)
                                  circle::BuiltinOptions_SelectV2Options, options);
  }
  
+uint32_t CircleGen::addOperatorSlice(const OperatorParams &params)
+{
+  auto options = circle::CreateSliceOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SLICE,
+                                circle::BuiltinOptions_SliceOptions, options);
+}
+
+uint32_t CircleGen::addOperatorSoftmax(const OperatorParams &params, float beta)
+{
+  auto options = circle::CreateSoftmaxOptions(_fbb, beta).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SOFTMAX,
+                                circle::BuiltinOptions_SoftmaxOptions, options);
+}
+
  uint32_t CircleGen::addOperatorSplit(const OperatorParams &params, int32_t num_split)
  {
    auto options = circle::CreateSplitOptions(_fbb, num_split).Union();
    return addOperatorWithOptions(params, circle::BuiltinOperator_SPLIT,
                                  circle::BuiltinOptions_SplitOptions, options);
  }
+
  uint32_t CircleGen::addOperatorStridedSlice(const OperatorParams &params, int32_t begin_mask,
                                              int32_t end_mask, int32_t ellipsis_mask,
                                              int32_t new_axis_mask, int32_t shrink_axis_mask)
@@ -379,6 +417,15 @@ uint32_t CircleGen::addOperatorStridedSlice(const OperatorParams &params, int32_
    return addOperatorWithOptions(params, circle::BuiltinOperator_STRIDED_SLICE,
                                  circle::BuiltinOptions_StridedSliceOptions, options);
  }
+
+uint32_t CircleGen::addOperatorSub(const OperatorParams &params,
+                                   circle::ActivationFunctionType actfn)
+{
+  auto options = circle::CreateSubOptions(_fbb, actfn).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SUB,
+                                circle::BuiltinOptions_SubOptions, options);
+}
+
  uint32_t CircleGen::addOperatorTile(const OperatorParams &params)
  {
    auto options = circle::CreateTileOptions(_fbb).Union();
@@ -496,6 +543,18 @@ flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams &p
                                false /* is_variable */, 0 /* sparsity */, 0 /* shape_signature */);
  }
  
+flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams &params,
+                                                           std::vector<float> &scales,
+                                                           std::vector<int64_t> &zero_points)
+{
+  auto shape = _fbb.CreateVector(params.shape);
+  auto name = _fbb.CreateString(params.name);
+  auto quantization =
+    circle::CreateQuantizationParametersDirect(_fbb, nullptr, nullptr, &scales, &zero_points);
+  return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name, quantization,
+                              false /* is_variable */, 0 /* sparsity */, 0 /* shape_signature */);
+}
+
  flatbuffers::Offset<circle::SparsityParameters>
  CircleGen::buildSparsityParameters(const SparsityParams &sp)
  {
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h

index 666218379047f2cf6bf53c936c38b3141221ddff..2b88af7b7e2e81e1a50257df3ec3ad9a3956ecb3 100644 (file)
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -128,6 +128,8 @@ public:
    uint32_t addBuffer(const uint8_t *buf, size_t size);
    uint32_t addTensor(const TensorParams &params);
    uint32_t addTensor(const TensorParams &params, float scale, int64_t zero_point);
+  uint32_t addTensor(const TensorParams &params, std::vector<float> &scale,
+                     std::vector<int64_t> &zero_point);
    uint32_t addTensor(const TensorParams &params, const SparsityParams &sp);
    void setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs);
    uint32_t nextSubgraph();
@@ -172,11 +174,13 @@ public:
    uint32_t addOperatorLeakyRelu(const OperatorParams &params, float alpha);
    uint32_t addOperatorLess(const OperatorParams &params);
    uint32_t addOperatorLogSoftmax(const OperatorParams &params);
+  uint32_t addOperatorMul(const OperatorParams &params, circle::ActivationFunctionType actfn);
    uint32_t addOperatorMean(const OperatorParams &params, bool keep_dims);
    uint32_t addOperatorNeg(const OperatorParams &params);
    uint32_t addOperatorOneHot(const OperatorParams &params, int32_t axis);
    uint32_t addOperatorPad(const OperatorParams &params);
    uint32_t addOperatorPadV2(const OperatorParams &params);
+  uint32_t addOperatorQuantize(const OperatorParams &params);
    uint32_t addOperatorRank(const OperatorParams &params);
    uint32_t addOperatorReduce(const OperatorParams &params, circle::BuiltinOperator reduce_op,
                               bool keep_dims);
@@ -193,12 +197,15 @@ public:
                              circle::TensorType type = circle::TensorType::TensorType_INT32);
    uint32_t addOperatorSelect(const OperatorParams &params);
    uint32_t addOperatorSelectV2(const OperatorParams &params);
+  uint32_t addOperatorSlice(const OperatorParams &params);
+  uint32_t addOperatorSoftmax(const OperatorParams &params, float beta);
    uint32_t addOperatorSplit(const OperatorParams &params, int32_t num_split);
    uint32_t addOperatorSqrt(const OperatorParams &params);
    uint32_t addOperatorSquare(const OperatorParams &params);
    uint32_t addOperatorStridedSlice(const OperatorParams &params, int32_t begin_mask = 0,
                                     int32_t end_mask = 0, int32_t ellipsis_mask = 0,
                                     int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0);
+  uint32_t addOperatorSub(const OperatorParams &params, circle::ActivationFunctionType actfn);
    uint32_t addOperatorTile(const OperatorParams &params);
    uint32_t addOperatorTranspose(const OperatorParams &params);
    uint32_t addOperatorWhile(const OperatorParams &params, uint32_t cond_subg, uint32_t body_subg);
@@ -215,6 +222,9 @@ private:
    flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params);
    flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params, float scale,
                                                    int64_t zero_point);
+  flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params,
+                                                  std::vector<float> &scales,
+                                                  std::vector<int64_t> &zero_points);
    flatbuffers::Offset<circle::SparsityParameters> buildSparsityParameters(const SparsityParams &sp);
    flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params,
                                                    const SparsityParams &sp);
diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc

index 5fbb844431f9174a7a2fd3979fe74070bc69df7a..4c482369fec709bc01ec3e1db4bff287a2b1ce6a 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
+++ b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
@@ -200,8 +200,6 @@ TEST_F(ValidationTestAddModelLoaded, debug_set_config)
    NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "0"));
    NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "1"));
    NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "2"));
-  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "OP_SEQ_MAX_NODE", "0"));
-  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "OP_SEQ_MAX_NODE", "1"));
    NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "EXECUTOR", "Linear"));
    NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "OP_BACKEND_ALLOPS", "cpu"));
    NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "USE_SCHEDULER", "0"));
diff --git a/tests/nnfw_api/src/ValidationTestMultipleSessions.cc b/tests/nnfw_api/src/ValidationTestMultipleSessions.cc

index 758e1dbd8861023144e80a0d693aacc1a5b8020c..ef00dc6bd80a71d8183dbc80d233f96211a3b548 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestMultipleSessions.cc
+++ b/tests/nnfw_api/src/ValidationTestMultipleSessions.cc
@@ -15,6 +15,7 @@
   */
  
  #include "fixtures.h"
+#include "one_op_tests/WhileTestModel.h"
  
  TEST_F(ValidationTestTwoSessions, neg_two_sessions_create)
  {
@@ -41,7 +42,7 @@ public:
    CircleBuffer cbuf;
  };
  
-TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_model)
+TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_AaveragePool_model)
  {
    constexpr int N = 64, H = 64, W = 64, C = 3;
    AveragePoolModel model(N, H, W, C);
@@ -85,4 +86,55 @@ TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_model)
    SUCCEED();
  }
  
+TEST_F(ValidationTestTwoSessionsCreated, neg_two_sessions_model_load)
+{
+  constexpr int N = 64, H = 64, W = 64, C = 3;
+  AveragePoolModel model(N, H, W, C);
+
+  NNFW_ENSURE_SUCCESS(
+    nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size()));
+  ASSERT_EQ(nnfw_load_circle_from_buffer(nullptr, model.cbuf.buffer(), model.cbuf.size()),
+            NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_While_model)
+{
+  WhileModelLoop10 model;
+
+  NNFW_ENSURE_SUCCESS(
+    nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size()));
+  NNFW_ENSURE_SUCCESS(
+    nnfw_load_circle_from_buffer(_session2, model.cbuf.buffer(), model.cbuf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session1, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session2, "cpu"));
+
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session2));
+
+  std::vector<float> in_buf1(model.inputCount()); // any value
+  std::vector<float> out_buf1(model.outputputCount());
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf1.data(),
+                                     in_buf1.size() * model.sizeOfDType()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_output(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf1.data(),
+                                      out_buf1.size() * model.sizeOfDType()));
+
+  std::vector<float> in_buf2(model.inputCount()); // any value
+  std::vector<float> out_buf2(model.outputputCount());
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf2.data(),
+                                     in_buf2.size() * model.sizeOfDType()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_output(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf2.data(),
+                                      out_buf2.size() * model.sizeOfDType()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session2));
+
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session2));
+
+  SUCCEED();
+}
+
  // TODO Write two-session-test with large models run by threads
diff --git a/tests/nnfw_api/src/fixtures.h b/tests/nnfw_api/src/fixtures.h

index 15f51eb6506a1709021d25ca9ada33e62bef80aa..e2e793ff3137dac777ff88eba3b82cb02b13eef9 100644 (file)
--- a/tests/nnfw_api/src/fixtures.h
+++ b/tests/nnfw_api/src/fixtures.h
@@ -100,8 +100,9 @@ protected:
      ValidationTestSessionCreated::SetUp();
      if (PackageNo == NNPackages::ADD)
      {
-      auto cbuf = genAddModel();
-      NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_session, cbuf.buffer(), cbuf.size()));
+      // NOTE the circle buffer must be kept until finishing the test, so keep it as a member
+      _cbuf = genAddModel();
+      NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_session, _cbuf.buffer(), _cbuf.size()));
      }
      else
      {
@@ -112,6 +113,9 @@ protected:
    }
  
    void TearDown() override { ValidationTestSessionCreated::TearDown(); }
+
+private:
+  CircleBuffer _cbuf; // Used only for models from buffer, unused for models from files
  };
  
  template <int PackageNo>
@@ -185,6 +189,7 @@ protected:
        auto cbuf = genAddModel();
        NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(obj.session, cbuf.buffer(), cbuf.size()));
        ASSERT_EQ(nnfw_prepare(obj.session), NNFW_STATUS_NO_ERROR);
+      _cbufs.push_back(std::move(cbuf)); // Keep the buffer so it can outlive the session
  
        uint32_t num_inputs;
        ASSERT_EQ(nnfw_input_size(obj.session, &num_inputs), NNFW_STATUS_NO_ERROR);
@@ -227,6 +232,7 @@ protected:
  
  protected:
    std::array<SessionObject, NUM_SESSIONS> _objects;
+  std::vector<CircleBuffer> _cbufs;
  };
  
  class ValidationTestTwoSessions : public ValidationTest
diff --git a/tests/nnfw_api/src/one_op_tests/Add.cc b/tests/nnfw_api/src/one_op_tests/Add.cc

index e43f6d23976c92fe729305d2022fd6288a66f338..9c0108b9ed8ce91f8f456b720a9bb508975b40fc 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/Add.cc
+++ b/tests/nnfw_api/src/one_op_tests/Add.cc
@@ -69,6 +69,38 @@ TEST_F(GenModelTest, OneOp_Add_VarToVarUint8)
    SUCCEED();
  }
  
+TEST_F(GenModelTest, OneOp_Add_VarToVarInt8)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
  TEST_F(GenModelTest, OneOp_Add_VarToVarSame)
  {
    CircleGen cgen;
@@ -119,6 +151,22 @@ TEST_F(GenModelTest, neg_OneOp_Add_InvalidType)
    SUCCEED();
  }
  
+TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
  TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
  {
    CircleGen cgen;
@@ -234,3 +282,20 @@ TEST_F(GenModelTest, neg_OneOp_Add_VarToVarSize0_InvalidShape)
  
    SUCCEED();
  }
+
+TEST_F(GenModelTest, net_OneOp_Add_VarToVarInt16)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc

index 3e769835ea92381979f2a23a368fc72404009546..1a6ded9c748fa06ecbf5709a0cf43032b1813035 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
+++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
@@ -16,38 +16,96 @@
  
  #include "GenModelTest.h"
  
-TEST_F(GenModelTest, OneOp_AvgPool2D)
+struct AvgPool2DParam
  {
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
+  TestCaseData tcd;
+  std::vector<int32_t> input_shape;
+  std::vector<int32_t> output_shape;
+  struct filter_stride
+  {
+    int32_t filter_w;
+    int32_t filter_h;
+    int32_t stride_w;
+    int32_t stride_h;
+  } param = {1, 1, 1, 1};
+  struct data_type
+  {
+    circle::TensorType data_type;
+    float scale;
+    int64_t zero_point;
+  } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0};
+  std::vector<std::string> backend = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class AveragePool2DVariation : public GenModelTest,
+                               public ::testing::WithParamInterface<AvgPool2DParam>
+{
+};
  
-TEST_F(GenModelTest, OneOp_AvgPool2D_Large)
+TEST_P(AveragePool2DVariation, Test)
  {
+  auto &param = GetParam();
    CircleGen cgen;
-  int in = cgen.addTensor({{1, 16, 32, 2}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 16, 16, 16, 16,
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, param.param.filter_w, param.param.filter_h,
                                  circle::ActivationFunctionType_NONE);
    cgen.setInputsAndOutputs({in}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({std::vector<float>(1024, 99)}, {{99, 99, 99, 99}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backend);
  
    SUCCEED();
  }
  
+// Test with different input type and value
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, AveragePool2DVariation,
+  ::testing::Values(
+    // float data
+    AvgPool2DParam{
+      uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}},
+    // float data - large
+    AvgPool2DParam{uniformTCD<float>({std::vector<float>(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}),
+                   {1, 18, 36, 2},
+                   {1, 1, 2, 2},
+                   {18, 18, 18, 18}},
+    // uint8_t data
+    AvgPool2DParam{uniformTCD<uint8_t>({{2, 6, 4, 8}}, {{5}}),
+                   {1, 2, 2, 1},
+                   {1, 1, 1, 1},
+                   {2, 2, 2, 2},
+                   {circle::TensorType::TensorType_UINT8, 1.2, 3}},
+    // uint8_t data -large
+    AvgPool2DParam{
+      uniformTCD<uint8_t>({{std::vector<uint8_t>(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}),
+      {1, 18, 36, 2},
+      {1, 1, 2, 2},
+      {18, 18, 18, 18},
+      {circle::TensorType::TensorType_UINT8, 1.2, 3}},
+    // int8_t data
+    // TODO enable acl-cl, acl-neon backend
+    AvgPool2DParam{uniformTCD<int8_t>({{2, -6, 4, -8}}, {{-2}}),
+                   {1, 2, 2, 1},
+                   {1, 1, 1, 1},
+                   {2, 2, 2, 2},
+                   {circle::TensorType::TensorType_INT8, 2.0, -1},
+                   {"cpu"}},
+    // int8_t data - large
+    // TODO enable acl-cl, acl-neon backend
+    AvgPool2DParam{
+      uniformTCD<int8_t>({{std::vector<int8_t>(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}),
+      {1, 18, 36, 2},
+      {1, 1, 2, 2},
+      {18, 18, 18, 18},
+      {circle::TensorType::TensorType_INT8, 2.0, -1},
+      {"cpu"}}));
+
  TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
  {
    // 3D Tensors are not supported
diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.cc

index 3822263e6843f5e1b0996c67e84ed2209be9f1b8..6156738925b3bbc246e58952327851de2847a872 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/Conv2D.cc
+++ b/tests/nnfw_api/src/one_op_tests/Conv2D.cc
@@ -88,6 +88,54 @@ TEST_F(GenModelTest, OneOp_Conv2D_Dilation)
    SUCCEED();
  }
  
+TEST_F(GenModelTest, OneOp_Conv2D_I8)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  int weight =
+    cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 38, 61}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1, 0.5};
+  std::vector<int64_t> weight_zeropoints = {0, 0, 0};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 30, 60}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
  TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
  {
    CircleGen cgen;
@@ -150,3 +198,51 @@ TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation)
  
    SUCCEED();
  }
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  int weight =
+    cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1, 0.5};
+  std::vector<int64_t> weight_zeropoints = {0, 0, 10};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc

index 87c67f10a3b37a6bc55ef1eef777b9a82862e6fd..57f448b56ccffd97bb90a7da67c04ce818e6769c 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
+++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
@@ -170,8 +170,25 @@ CircleBuffer genNegTestDepthwiseConv2DModel(circle::Padding padding, int stride_
    return cgen.finish();
  }
  
-CircleBuffer genSimpleDepthwiseConv2DQuantizedModel(int stride, int input_depth,
-                                                    int depth_multiplier)
+template <typename T> struct DepthwiseConv2DQuantTestParam
+{
+  int stride = 1; // Used for both height and width
+  int input_depth = 1;
+  int depth_multiplier = 1;
+  std::vector<T> ref_output;
+};
+
+template <typename T>
+class DepthwiseConv2DQuantTest
+  : public GenModelTest,
+    public ::testing::WithParamInterface<DepthwiseConv2DQuantTestParam<T>>
+{
+};
+
+using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
+using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
+
+CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
  {
    assert(1 <= stride && stride <= 2);
    assert(1 <= input_depth && input_depth <= 16);
@@ -198,20 +215,7 @@ CircleBuffer genSimpleDepthwiseConv2DQuantizedModel(int stride, int input_depth,
    return cgen.finish();
  }
  
-struct DepthwiseConv2DVariationParam
-{
-  int stride = 1; // Used for both height and width
-  int input_depth = 1;
-  int depth_multiplier = 1;
-  std::vector<uint8_t> ref_output;
-};
-
-class DepthwiseConv2DVariation : public GenModelTest,
-                                 public ::testing::WithParamInterface<DepthwiseConv2DVariationParam>
-{
-};
-
-TEST_P(DepthwiseConv2DVariation, Test)
+TEST_P(DepthwiseConv2DQuantTestU8, Test)
  {
    // Same input is used for all tests but output differs
    static const std::vector<uint8_t> input64{
@@ -219,8 +223,8 @@ TEST_P(DepthwiseConv2DVariation, Test)
      2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
  
    auto &param = GetParam();
-  _context = std::make_unique<GenModelTestContext>(genSimpleDepthwiseConv2DQuantizedModel(
-    param.stride, param.input_depth, param.depth_multiplier));
+  _context = std::make_unique<GenModelTestContext>(
+    genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
    std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
    _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
    _context->setBackends({"acl_cl", "acl_neon", "cpu"});
@@ -231,39 +235,122 @@ TEST_P(DepthwiseConv2DVariation, Test)
  // Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
  // kernels.
  INSTANTIATE_TEST_CASE_P(
-  GenModelTest, DepthwiseConv2DVariation,
+  GenModelTest, DepthwiseConv2DQuantTestU8,
    ::testing::Values(
      // Stride == 1
-    DepthwiseConv2DVariationParam{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DVariationParam{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
-    DepthwiseConv2DVariationParam{
+    DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+    DepthwiseConv2DQuantTestParamU8{
        1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
-    DepthwiseConv2DVariationParam{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
-    DepthwiseConv2DVariationParam{1, 2, 1, std::vector<uint8_t>{2, 5}},
-    DepthwiseConv2DVariationParam{1, 1, 2, std::vector<uint8_t>{2, 4}},
-    DepthwiseConv2DVariationParam{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
-    DepthwiseConv2DVariationParam{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
-    DepthwiseConv2DVariationParam{
+    DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
+    DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector<uint8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector<uint8_t>{2, 4}},
+    DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamU8{
        1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
-    DepthwiseConv2DVariationParam{1, 12, 1,
-                                  std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+    DepthwiseConv2DQuantTestParamU8{1, 12, 1,
+                                    std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
      // Stride == 2
-    DepthwiseConv2DVariationParam{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
-    DepthwiseConv2DVariationParam{2, 2, 1, std::vector<uint8_t>{2, 5}},
-    DepthwiseConv2DVariationParam{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DVariationParam{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
-                                                                 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
-                                                                 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DVariationParam{
+    DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector<uint8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+                                                                   5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+                                                                   3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{
        2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DVariationParam{
+    DepthwiseConv2DQuantTestParamU8{
        2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DVariationParam{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DVariationParam{
+    DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamU8{
        2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DVariationParam{
+    DepthwiseConv2DQuantTestParamU8{
        2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
  
+using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
+using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
+
+CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+{
+  assert(1 <= stride && stride <= 2);
+  assert(1 <= input_depth && input_depth <= 16);
+  assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+  const int output_depth = input_depth * depth_multiplier;
+  assert(1 <= output_depth && output_depth <= 32);
+
+  CircleGen cgen;
+  uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestI8, Test)
+{
+  // Same input is used for all tests but output differs
+  static const std::vector<int8_t> input64{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+  auto &param = GetParam();
+  _context = std::make_unique<GenModelTestContext>(
+    genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
+  std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+  _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
+// kernels.
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, DepthwiseConv2DQuantTestI8,
+  ::testing::Values(
+    // Stride == 1
+    DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector<int8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+    DepthwiseConv2DQuantTestParamI8{
+      1, 2, 8, std::vector<int8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+    DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector<int8_t>{0, 1, 4, 6}},
+    DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector<int8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector<int8_t>{2, 4}},
+    DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector<int8_t>{0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamI8{
+      1, 4, 4, std::vector<int8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+    DepthwiseConv2DQuantTestParamI8{1, 12, 1,
+                                    std::vector<int8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+    // Stride == 2
+    DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector<int8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+                                                                  5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+                                                                  3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 1, 20, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 1, 16, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 8, 2, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+
  TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
  {
    _context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
@@ -275,3 +362,27 @@ TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
  }
  
  // TODO add other invalid operation tests like above
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1};
+  std::vector<int64_t> weight_zeropoints = {0, 10};
+  int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Mul.cc b/tests/nnfw_api/src/one_op_tests/Mul.cc

new file mode 100644 (file)

index 0000000..0c79446
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Mul.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Mul_Uint8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{3, 12, 5, 2}, {5, 4, 7, 0}}, {{2, 110, 50, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Mul_Int8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{-14, -34, -6, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_MulBroadcast_Uint8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{3, 12, 5, 4}, {5}}, {{2, 146, 34, 18}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_MulBroadcast_Int8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{-14, 2, -6, 10}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_InvalidShape)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_OneOperand)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMul({{in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_ThreeOperands)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMul({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc

index 380c1a3cde96e4fe674b90be46964f8cbaa0c7da..42971da79bff55b054f9ea7fb34730fd34028fdb 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/Pad.cc
+++ b/tests/nnfw_api/src/one_op_tests/Pad.cc
@@ -16,25 +16,56 @@
  
  #include "GenModelTest.h"
  
-TEST_F(GenModelTest, OneOp_Pad)
+// Input shape: {1, 2, 2, 1}
+// Padding: {0, 0, 1, 1, 1, 1, 0, 0}
+// Output shape: {1, 4, 4, 1}
+struct PadParam
  {
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class PadVariation : public GenModelTest, public ::testing::WithParamInterface<PadParam>
+{
+};
+
+TEST_P(PadVariation, Test)
+{
+  auto &param = GetParam();
+
    CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
    std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
    uint32_t padding_buf = cgen.addBuffer(padding_data);
    int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
  
    cgen.addOperatorPad({{in, padding}, {out}});
    cgen.setInputsAndOutputs({in}, {out});
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}}));
+  _context->addTestCase(param.tcd);
    _context->setBackends({"acl_cl", "acl_neon", "cpu"});
  
    SUCCEED();
  }
  
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, PadVariation,
+  ::testing::Values(
+    // float value
+    PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
+    // uint8 value
+    PadParam{
+      uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
+      circle::TensorType::TensorType_UINT8, 1.0, 8},
+    // int8 value
+    PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
+                                {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
+             circle::TensorType::TensorType_INT8, 1.0, -5}));
+
  TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
  {
    CircleGen cgen;
@@ -91,3 +122,39 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
  
    SUCCEED();
  }
+
+TEST_F(GenModelTest, neg_OneOp_Pad_Type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.cc b/tests/nnfw_api/src/one_op_tests/PadV2.cc

index f9fe5f644d1f3fb149e3b9b0b4aac1c3ae5ae0b4..3db2187b26fa65c23492f63e34a17a3d47733cbc 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/PadV2.cc
+++ b/tests/nnfw_api/src/one_op_tests/PadV2.cc
@@ -112,3 +112,49 @@ TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim1)
  
    SUCCEED();
  }
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_Type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<uint8_t> padding_value_data{3};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+    cgen.addTensor({{1}, circle::TensorType::TensorType_UINT8, padding_value_buf}, 1.0, 1);
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_QuantParam)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 2);
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<uint8_t> padding_value_data{3};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+    cgen.addTensor({{1}, circle::TensorType::TensorType_UINT8, padding_value_buf}, 1.0, 1);
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+
+  cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Quantize.cc b/tests/nnfw_api/src/one_op_tests/Quantize.cc

new file mode 100644 (file)

index 0000000..5ab4d62
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Quantize.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleQuantizeModel(circle::TensorType from_t, float input_scale, int input_zeropoint,
+                                 circle::TensorType to_t, float output_scale, int output_zeropoint)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 4, 4, 1}, from_t}, input_scale, input_zeropoint);
+  int out = cgen.addTensor({{1, 4, 4, 1}, to_t}, output_scale, output_zeropoint);
+  cgen.addOperatorQuantize({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Quantize_Uint8toInt8)
+{
+  CircleGen cgen =
+    genSimpleQuantizeModel(circle::TensorType_UINT8, 1., 128, circle::TensorType_INT8, 2., -10);
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<uint8_t>({127, 48, 151, 232, 56, 176, 47, 37, 51, 52, 39, 94, 15, 108, 142, 243})
+      .addOutput<int8_t>(
+        {-10, -50, 2, 42, -46, 14, -50, -55, -48, -48, -54, -27, -66, -20, -3, 48}));
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Quantize_Int8toUint8)
+{
+  CircleGen cgen =
+    genSimpleQuantizeModel(circle::TensorType_INT8, 2., -10, circle::TensorType_UINT8, 1., 128);
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<int8_t>({-10, -50, 2, 42, -46, 14, -50, -55, -48, -48, -54, -27, -66, -20, -3, 48})
+      .addOutput<uint8_t>({128, 48, 152, 232, 56, 176, 48, 38, 52, 52, 40, 94, 16, 108, 142, 244}));
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Quantize_Uint8toInt16)
+{
+  CircleGen cgen =
+    genSimpleQuantizeModel(circle::TensorType_UINT8, 1., 128, circle::TensorType_INT16, 2., -10);
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Quantize_Int8toInt16)
+{
+  CircleGen cgen =
+    genSimpleQuantizeModel(circle::TensorType_INT8, 2., -10, circle::TensorType_INT16, 1., 128);
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc

index 20320a0d3034023af94fcb9021a6e774ae1ca9e3..5db08f168da1156167e46fa168e2695114432a41 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc
+++ b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc
@@ -18,25 +18,52 @@
  
  #include <memory>
  
-TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToConst)
+struct ResizeBilinearParam
  {
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class ResizeBilinearVariation : public GenModelTest,
+                                public ::testing::WithParamInterface<ResizeBilinearParam>
+{
+};
+
+TEST_P(ResizeBilinearVariation, Test)
+{
+  auto &param = GetParam();
+
    CircleGen cgen;
    std::vector<int32_t> size_data{3, 3};
    uint32_t size_buf = cgen.addBuffer(size_data);
    int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point);
    cgen.addOperatorResizeBilinear({{in, size}, {out}});
    cgen.setInputsAndOutputs({in}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}}));
+  _context->addTestCase(param.tcd);
    _context->setBackends({"acl_cl", "acl_neon", "cpu"});
  
    SUCCEED();
  }
  
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, ResizeBilinearVariation,
+  ::testing::Values(
+    // float value
+    ResizeBilinearParam{uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667,
+                                                            1.666666667, 2, 2, 2}})},
+    // uint8 value
+    ResizeBilinearParam{uniformTCD<uint8_t>({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}),
+                        circle::TensorType::TensorType_UINT8, 1.0, 0},
+    // int8 value
+    ResizeBilinearParam{uniformTCD<int8_t>({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}),
+                        circle::TensorType::TensorType_INT8, 1.0, 0}));
+
  TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar)
  {
    CircleGen cgen;
diff --git a/tests/nnfw_api/src/one_op_tests/Slice.cc b/tests/nnfw_api/src/one_op_tests/Slice.cc

new file mode 100644 (file)

index 0000000..960cd88
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Slice.cc
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct SliceVariationParam
+{
+  std::vector<int32_t> input_shape;
+  std::vector<int32_t> begins;
+  std::vector<int32_t> sizes;
+  TestCaseData tcd;
+
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+  circle::TensorType begins_type = circle::TensorType::TensorType_INT32;
+};
+
+class SliceVariation : public GenModelTest,
+                       public ::testing::WithParamInterface<SliceVariationParam>
+{
+};
+
+TEST_P(SliceVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+  if (param.begins_type == circle::TensorType::TensorType_INT32)
+  {
+    uint32_t begins_buf = cgen.addBuffer(param.begins);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+    int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  else if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    std::vector<int64_t> begins_64(param.begins.size());
+    std::vector<int64_t> sizes_64(param.sizes.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      begins_64[i] = param.begins[i];
+      sizes_64[i] = param.sizes[i];
+    }
+
+    uint32_t begins_buf = cgen.addBuffer(begins_64);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+    int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+
+  // acl don't support int64 yet
+  if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    _context->setBackends({"cpu"});
+  }
+  else
+  {
+    _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+  }
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, SliceVariation,
+  ::testing::Values(
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+      circle::TensorType::TensorType_UINT8,
+      1,
+      0},
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+      circle::TensorType::TensorType_FLOAT32,
+      0,
+      0,
+      circle::TensorType::TensorType_INT64}));
+
+TEST_F(GenModelTest, neg_OneOp_Slice_Type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<float> begins_data = {0, 0, 1, 0};
+  uint32_t begins_buf = cgen.addBuffer(begins_data);
+  int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf});
+  std::vector<float> sizes_data = {1, 2, 1, 1};
+  uint32_t sizes_buf = cgen.addBuffer(sizes_data);
+  int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf});
+  int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Slice_DiffType)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> begins_data = {0, 0, 1, 0};
+  uint32_t begins_buf = cgen.addBuffer(begins_data);
+  int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, begins_buf});
+  std::vector<int64_t> sizes_data = {1, 2, 1, 1};
+  uint32_t sizes_buf = cgen.addBuffer(sizes_data);
+  int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_INT64, sizes_buf});
+  int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.cc b/tests/nnfw_api/src/one_op_tests/Softmax.cc

new file mode 100644 (file)

index 0000000..80fd17b
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Softmax.cc
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// beta = 0.1
+// input/output shape: {1, 2, 1, 4}
+struct SoftmaxParam
+{
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float input_scale = 0.0f;
+  int64_t input_zero_point = 0;
+};
+
+class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface<SoftmaxParam>
+{
+};
+
+TEST_P(SoftmaxVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  // NNAPI spec and tflite test use fixed output scale and zero-point
+  float out_scale = 0.0;
+  int64_t out_zero_point = 0;
+  if (param.data_type == circle::TensorType::TensorType_UINT8)
+  {
+    out_scale = 1.0f / 256;
+  }
+  else if (param.data_type == circle::TensorType::TensorType_INT8)
+  {
+    out_scale = 1.0f / 256;
+    out_zero_point = -128;
+  }
+
+  int input =
+    cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+  int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point);
+  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"cpu", "acl_neon", "acl_cl"});
+
+  SUCCEED();
+}
+
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, SoftmaxVariation,
+  ::testing::Values(
+    // float value
+    SoftmaxParam{
+      uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
+                        {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
+    // uint8 value
+    SoftmaxParam{
+      uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
+      circle::TensorType::TensorType_UINT8, 1.0, 10},
+    // int8 value
+    SoftmaxParam{
+      uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
+      circle::TensorType::TensorType_INT8, 1.0, 0}));
+
+TEST_F(GenModelTest, neg_OneOp_Softmax_Type)
+{
+  CircleGen cgen;
+  int input = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Sub.cc b/tests/nnfw_api/src/one_op_tests/Sub.cc

new file mode 100644 (file)

index 0000000..bb4fecd
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Sub.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Sub_Uint8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{13, 12, 25, 40}, {5, 4, 7, 0}}, {{6, 8, 22, 80}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Sub_Int8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{-16, 24, 34, -6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_SubBroadcast_Uint8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{13, 12, 25, 40}, {5}}, {{6, 4, 30, 60}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_SubBroadcast_Int8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{-16, -12, -14, -10}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_InvalidShape)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_OneOperand)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSub({{in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_ThreeOperands)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSub({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/While.cc b/tests/nnfw_api/src/one_op_tests/While.cc

index a5929a514aa1acafeaf53fcdaa54da92e434195a..ee0a9df46a77e295335e5d711bea4f51fc25bae6 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/While.cc
+++ b/tests/nnfw_api/src/one_op_tests/While.cc
@@ -15,57 +15,14 @@
   */
  
  #include "GenModelTest.h"
+#include "WhileTestModel.h"
  
  #include <memory>
  
  TEST_F(GenModelTest, OneOp_While)
  {
-  // The model looks just like the below pseudocode
-  //
-  // function model(x)
-  // {
-  //   while (x < 100.0)
-  //   {
-  //     x = x + 10.0;
-  //   }
-  //   return x
-  // }
-
-  CircleGen cgen;
-  std::vector<float> incr_data{10};
-  uint32_t incr_buf = cgen.addBuffer(incr_data);
-  std::vector<float> end_data{100};
-  uint32_t end_buf = cgen.addBuffer(end_data);
-
-  // primary subgraph
-  {
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
-    cgen.setInputsAndOutputs({x_in}, {x_out});
-  }
-
-  // cond subgraph
-  {
-    cgen.nextSubgraph();
-    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
-    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
-    cgen.addOperatorLess({{x, end}, {result}});
-    cgen.setInputsAndOutputs({x}, {result});
-  }
-
-  // body subgraph
-  {
-    cgen.nextSubgraph();
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
-    cgen.setInputsAndOutputs({x_in}, {x_out});
-  }
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  WhileModelLoop10 model;
+  _context = std::make_unique<GenModelTestContext>(std::move(model.cbuf));
    _context->addTestCase(uniformTCD<float>({{0}}, {{100}}));
    _context->addTestCase(uniformTCD<float>({{2}}, {{102}}));
    _context->addTestCase(uniformTCD<float>({{22}}, {{102}}));
diff --git a/tests/nnfw_api/src/one_op_tests/WhileTestModel.h b/tests/nnfw_api/src/one_op_tests/WhileTestModel.h

new file mode 100644 (file)

index 0000000..a1873cc
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/WhileTestModel.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_API_TEST_WHILE_TEST_MODEL_H__
+#define __NNFW_API_TEST_WHILE_TEST_MODEL_H__
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+class WhileModelLoop10
+{
+public:
+  WhileModelLoop10()
+  {
+    // The model looks just like the below pseudocode
+    //
+    // function model(x)
+    // {
+    //   while (x < 100.0)
+    //   {
+    //     x = x + 10.0;
+    //   }
+    //   return x
+    // }
+    CircleGen cgen;
+    std::vector<float> incr_data{10};
+    uint32_t incr_buf = cgen.addBuffer(incr_data);
+    std::vector<float> end_data{100};
+    uint32_t end_buf = cgen.addBuffer(end_data);
+
+    // primary subgraph
+    {
+      int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
+      cgen.setInputsAndOutputs({x_in}, {x_out});
+    }
+
+    // cond subgraph
+    {
+      cgen.nextSubgraph();
+      int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+      int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+      cgen.addOperatorLess({{x, end}, {result}});
+      cgen.setInputsAndOutputs({x}, {result});
+    }
+
+    // body subgraph
+    {
+      cgen.nextSubgraph();
+      int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+      int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+      cgen.setInputsAndOutputs({x_in}, {x_out});
+    }
+    cbuf = cgen.finish();
+  }
+
+  int inputCount() { return 1; }
+  int outputputCount() { return 1; }
+  int sizeOfDType() { return sizeof(float); }
+
+  CircleBuffer cbuf;
+};
+
+#endif // __NNFW_API_TEST_WHILE_TEST_MODEL_H__
diff --git a/tests/scripts/CMakeLists.txt b/tests/scripts/CMakeLists.txt

index 40e0dfdaa273fdc62b7f373f1f27a6d3230cb3aa..ec319cab22810f2c6a71b9f3574dfe4fc336fb6b 100644 (file)
--- a/tests/scripts/CMakeLists.txt
+++ b/tests/scripts/CMakeLists.txt
@@ -17,10 +17,6 @@ install(PROGRAMS ${MODEL_TEST_SCRIPT} DESTINATION test/models)
  file(GLOB TFLITE_CONFIG_DIR models/tflite)
  install(DIRECTORY ${TFLITE_CONFIG_DIR} DESTINATION test/models)
  
-# Install nnpackage test config
-file(GLOB NNPACKAGE_MODEL_CONFIG_DIR models/nnfw_api_gtest)
-install(DIRECTORY ${NNPACKAGE_MODEL_CONFIG_DIR} DESTINATION test/models)
-
  # Install test list
  file(GLOB TEST_LIST_DIR list)
  install(DIRECTORY ${TEST_LIST_DIR} DESTINATION test)
diff --git a/tests/scripts/benchmark.sh b/tests/scripts/benchmark.sh

index a6bb821b7af255f558a08a242c8cc7c59f98af3e..1779411890ef5c47946f58de7460402f99c656d3 100644 (file)
--- a/tests/scripts/benchmark.sh
+++ b/tests/scripts/benchmark.sh
@@ -92,7 +92,7 @@ $BRIDGE shell tar -zxf $TEST_ROOT/nnpkg.tar.gz -C $TEST_ROOT/nnpkg
  $BRIDGE shell rm $TEST_ROOT/nnpkg.tar.gz
  
  # 1. Run
-$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib OP_SEQ_MAX_NODE=1 TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/nnpackage_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
+$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/nnpackage_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
  
  # 2. Pull result file
  echo "Pulling data from target to trace.json"
diff --git a/tests/scripts/command/prepare-model b/tests/scripts/command/prepare-model

index 9fd790ebed18a5ec75fe6423f555a8dc3c587b72..5b3340813e1e9c22946d1de4d83f4756388d933f 100644 (file)
--- a/tests/scripts/command/prepare-model
+++ b/tests/scripts/command/prepare-model
@@ -18,7 +18,6 @@ COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
  
  MD5_CHECK="on"
-DOWNLOAD_MODEL="all"
  
  function Usage()
  {
@@ -26,7 +25,7 @@ function Usage()
      echo ""
      echo "Options:"
      echo "      --ignoremd5                         Ignore MD5 check when download model files"
-    echo "      --model=(all|nnpackage|tflite)      Download test model (default=all)"
+    echo "      --model=(all|nnpackage|tflite)      Download test model (deprecated option: always all)"
  }
  
  for i in "$@"
@@ -40,7 +39,7 @@ do
              MD5_CHECK="off"
              ;;
          --model=*)
-            DOWNLOAD_MODEL=${i#*=}
+            # deprecated
              ;;
          *)
              echo "Unknown option: $i"
@@ -56,15 +55,4 @@ if [[ -z "$MODELFILE_SERVER" ]]; then
  fi
  echo "Download from $MODELFILE_SERVER"
  
-if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "tflite" ]]; then
-    # Download tflite models
-    $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
-fi
-
-if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "nnpackage" ]]; then
-    # Download nnpackage model
-    NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnfw_api_gtest/
-    NNPACKAGE_CACHE_DIR=$INSTALL_DIR/unittest_standalone/nnfw_api_gtest_models/
-    $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK \
-        --configdir=$NNPACKAGE_CONFIG_DIR --cachedir=$NNPACKAGE_CACHE_DIR
-fi
+$INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
diff --git a/tests/scripts/command/verify-tflite b/tests/scripts/command/verify-tflite

index 98765cc2079071a4b7aa16f89f1851d26d7ecf18..fff1106eccb96ff3af5dcec6013a3d797efab95e 100644 (file)
--- a/tests/scripts/command/verify-tflite
+++ b/tests/scripts/command/verify-tflite
@@ -18,7 +18,7 @@ COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
  
  MD5_CHECK="on"
-TFLITE_LOADER="nnapi"
+TFLITE_LOADER="loader"
  REPORT_DIR="report"
  TEST_LIST_FILE=
  
@@ -78,7 +78,7 @@ if [[ $TFLITE_LOADER == "nnapi" ]]; then
      TEST_DRIVER=nnapi_test
  elif [[ $TFLITE_LOADER == "loader" ]]; then
      TEST_NAME="Loader Verification"
-    TEST_DRIVER=tflite_loader_test_tool
+    TEST_DRIVER=tflite_comparator
  else
      Usage
      exit 1
diff --git a/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt b/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt

deleted file mode 100644 (file)

index dd8d3b7..0000000
--- a/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-custom
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt b/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt

deleted file mode 100644 (file)

index b58d39a..0000000
--- a/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-floor
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-one_hot
-pack
-pad
-reduce_max
-reduce_mean
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_depth
-sqrt
-squeeze
-strided_slice
-sub
-tanh
-transpose
-transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.aarch64.cpu.txt b/tests/scripts/list/frameworktest_list.aarch64.cpu.txt

deleted file mode 100644 (file)

index 4b4b7fb..0000000
--- a/tests/scripts/list/frameworktest_list.aarch64.cpu.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-equal
-exp
-fullyconnected
-greater
-greater_equal
-less
-less_equal
-logistic
-max
-max_pool_2d
-min
-mul
-neg
-not_equal
-one_hot
-pack
-reduce_max
-reduce_sum
-reshape/reshape1
-select
-softmax
-squeeze
-sub
-tanh
-tile
-transpose
-zeros_like
diff --git a/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt b/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt

deleted file mode 100644 (file)

index dd8d3b7..0000000
--- a/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-custom
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt b/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt

deleted file mode 100644 (file)

index 9df071b..0000000
--- a/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-one_hot
-pack
-pad
-reduce_max
-reduce_mean
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_depth
-sqrt
-squeeze
-strided_slice
-sub
-tanh
-transpose
-transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.armv7l.cpu.txt b/tests/scripts/list/frameworktest_list.armv7l.cpu.txt

deleted file mode 100644 (file)

index cf32977..0000000
--- a/tests/scripts/list/frameworktest_list.armv7l.cpu.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-MODELS/mobilenet_quant8
-abs
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-equal
-exp
-fullyconnected
-greater
-greater_equal
-less
-less_equal
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-neg
-not_equal
-one_hot
-pack
-reduce_max
-reduce_sum
-reshape/reshape1
-rsqrt
-select
-shape
-sin
-slice
-strided_slice
-softmax
-squeeze
-sub
-tanh
-tile
-transpose
-zeros_like
diff --git a/tests/scripts/list/frameworktest_list.noarch.interp.txt b/tests/scripts/list/frameworktest_list.noarch.interp.txt

deleted file mode 100644 (file)

index 3555ee2..0000000
--- a/tests/scripts/list/frameworktest_list.noarch.interp.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected/fc1
-logistic
-max_pool_2d
-pad
-relu
-relu6
-reshape/reshape1
-softmax
-tanh
diff --git a/tests/scripts/list/frameworktest_list.x86_64.cpu.txt b/tests/scripts/list/frameworktest_list.x86_64.cpu.txt

deleted file mode 100644 (file)

index 5750ec4..0000000
--- a/tests/scripts/list/frameworktest_list.x86_64.cpu.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-MODELS/mobilenet_quant8
-add
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected
-logistic
-max_pool_2d
-mean
-reduce_max
-reduce_sum
-reshape/reshape1
-select
-softmax
-squeeze
-tile
-transpose
-zeros_like
diff --git a/tests/scripts/list/nnapi_test.aarch64.list b/tests/scripts/list/nnapi_test.aarch64.list

new file mode 100644 (file)

index 0000000..dd8d3b7
--- /dev/null
+++ b/tests/scripts/list/nnapi_test.aarch64.list
@@ -0,0 +1,46 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+custom
+depthwise_conv_2d
+div
+embedding_lookup
+exp
+floor
+fullyconnected
+gather
+hashtable_lookup
+l2_normalization
+l2_pool_2d
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+topk_v2
+transpose
+transpose_conv
diff --git a/tests/scripts/list/nnapi_test.armv7l.list b/tests/scripts/list/nnapi_test.armv7l.list

new file mode 100644 (file)

index 0000000..dd8d3b7
--- /dev/null
+++ b/tests/scripts/list/nnapi_test.armv7l.list
@@ -0,0 +1,46 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+custom
+depthwise_conv_2d
+div
+embedding_lookup
+exp
+floor
+fullyconnected
+gather
+hashtable_lookup
+l2_normalization
+l2_pool_2d
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+topk_v2
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_comparator.aarch64.acl_cl.list b/tests/scripts/list/tflite_comparator.aarch64.acl_cl.list

new file mode 100644 (file)

index 0000000..24fa980
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.aarch64.acl_cl.list
@@ -0,0 +1,41 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+exp
+floor
+fullyconnected
+gather
+l2_normalization
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_comparator.aarch64.acl_neon.list b/tests/scripts/list/tflite_comparator.aarch64.acl_neon.list

new file mode 100644 (file)

index 0000000..0d443a7
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.aarch64.acl_neon.list
@@ -0,0 +1,38 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+floor
+gather
+l2_normalization
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+one_hot
+pack
+pad
+reduce_max
+reduce_mean
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_depth
+sqrt
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_comparator.aarch64.cpu.list b/tests/scripts/list/tflite_comparator.aarch64.cpu.list

new file mode 100644 (file)

index 0000000..4b4b7fb
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.aarch64.cpu.list
@@ -0,0 +1,36 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+equal
+exp
+fullyconnected
+greater
+greater_equal
+less
+less_equal
+logistic
+max
+max_pool_2d
+min
+mul
+neg
+not_equal
+one_hot
+pack
+reduce_max
+reduce_sum
+reshape/reshape1
+select
+softmax
+squeeze
+sub
+tanh
+tile
+transpose
+zeros_like
diff --git a/tests/scripts/list/tflite_comparator.armv7l.acl_cl.list b/tests/scripts/list/tflite_comparator.armv7l.acl_cl.list

new file mode 100644 (file)

index 0000000..24fa980
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.armv7l.acl_cl.list
@@ -0,0 +1,41 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+exp
+floor
+fullyconnected
+gather
+l2_normalization
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_comparator.armv7l.acl_neon.list b/tests/scripts/list/tflite_comparator.armv7l.acl_neon.list

new file mode 100644 (file)

index 0000000..8cce41a
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.armv7l.acl_neon.list
@@ -0,0 +1,39 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+floor
+fullyconnected
+gather
+l2_normalization
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+one_hot
+pack
+pad
+reduce_max
+reduce_mean
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_depth
+sqrt
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_comparator.armv7l.cpu.list b/tests/scripts/list/tflite_comparator.armv7l.cpu.list

new file mode 100644 (file)

index 0000000..cf32977
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.armv7l.cpu.list
@@ -0,0 +1,44 @@
+MODELS/inception_module
+MODELS/mobilenet
+MODELS/mobilenet_quant8
+abs
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+equal
+exp
+fullyconnected
+greater
+greater_equal
+less
+less_equal
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+neg
+not_equal
+one_hot
+pack
+reduce_max
+reduce_sum
+reshape/reshape1
+rsqrt
+select
+shape
+sin
+slice
+strided_slice
+softmax
+squeeze
+sub
+tanh
+tile
+transpose
+zeros_like
diff --git a/tests/scripts/list/tflite_comparator.noarch.interp.list b/tests/scripts/list/tflite_comparator.noarch.interp.list

new file mode 100644 (file)

index 0000000..3555ee2
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.noarch.interp.list
@@ -0,0 +1,16 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+concat
+conv_2d
+depthwise_conv_2d
+fullyconnected/fc1
+logistic
+max_pool_2d
+pad
+relu
+relu6
+reshape/reshape1
+softmax
+tanh
diff --git a/tests/scripts/list/tflite_comparator.x86_64.cpu.list b/tests/scripts/list/tflite_comparator.x86_64.cpu.list

new file mode 100644 (file)

index 0000000..5750ec4
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.x86_64.cpu.list
@@ -0,0 +1,21 @@
+MODELS/inception_module
+MODELS/mobilenet
+MODELS/mobilenet_quant8
+add
+average_pool_2d
+concat
+conv_2d
+depthwise_conv_2d
+fullyconnected
+logistic
+max_pool_2d
+mean
+reduce_max
+reduce_sum
+reshape/reshape1
+select
+softmax
+squeeze
+tile
+transpose
+zeros_like
diff --git a/tests/scripts/list/tflite_loader_list.aarch64.txt b/tests/scripts/list/tflite_loader_list.aarch64.txt

deleted file mode 100644 (file)

index e04d89d..0000000
--- a/tests/scripts/list/tflite_loader_list.aarch64.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-concat
-conv_2d/convolution1
-depthwise_conv_2d
-div
-exp
-fullyconnected/fc1
-logistic
-max
-max_pool_2d/maxpool1
-mean
-min
-mul
-pack
-pad
-reduce_max
-reduce_sum/float
-relu
-relu6
-reshape/reshape1
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-sqrt
-squeeze
-sub
-tanh
-transpose
-transpose_conv
diff --git a/tests/scripts/list/tflite_loader_list.armv7l.txt b/tests/scripts/list/tflite_loader_list.armv7l.txt

deleted file mode 100644 (file)

index e04d89d..0000000
--- a/tests/scripts/list/tflite_loader_list.armv7l.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-concat
-conv_2d/convolution1
-depthwise_conv_2d
-div
-exp
-fullyconnected/fc1
-logistic
-max
-max_pool_2d/maxpool1
-mean
-min
-mul
-pack
-pad
-reduce_max
-reduce_sum/float
-relu
-relu6
-reshape/reshape1
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-sqrt
-squeeze
-sub
-tanh
-transpose
-transpose_conv
diff --git a/tests/scripts/models/nnfw_api_gtest/add/config.sh b/tests/scripts/models/nnfw_api_gtest/add/config.sh

deleted file mode 100644 (file)

index e6e8677..0000000
--- a/tests/scripts/models/nnfw_api_gtest/add/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="add.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh b/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh

deleted file mode 100644 (file)

index 92c9032..0000000
--- a/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="add_invalid_manifest.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh b/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh

deleted file mode 100644 (file)

index 0d697a2..0000000
--- a/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="add_no_manifest.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh b/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh

deleted file mode 100644 (file)

index 3b8506c..0000000
--- a/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="if_dynamic.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh b/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh

deleted file mode 100644 (file)

index ff14d4e..0000000
--- a/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="while_dynamic.zip"
diff --git a/tests/tools/nnpackage_run/src/allocation.h b/tests/tools/nnpackage_run/src/allocation.h

index e7f1a9c75d215738bccd755731b647bc5ad8ed9f..20e21eb5946c76fe97004f81921510e967757be6 100644 (file)
--- a/tests/tools/nnpackage_run/src/allocation.h
+++ b/tests/tools/nnpackage_run/src/allocation.h
@@ -33,6 +33,6 @@ public:
  private:
    void *data_;
  };
-} // end of namespace
+} // namespace nnpkg_run
  
  #endif // __NNPACKAGE_RUN_ALLOCATION_H__
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc

index eeedcb77ac10e041bd03be275354c506ef44979d..e207465d4d4285836beeffcee625fa640c4fa8a1 100644 (file)
--- a/tests/tools/nnpackage_run/src/h5formatter.cc
+++ b/tests/tools/nnpackage_run/src/h5formatter.cc
@@ -137,6 +137,12 @@ void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation
              throw std::runtime_error(
                "model input type is qasymm8, bool or uint8. But h5 data type is different.");
            break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
+          else
+            throw std::runtime_error("model input type is int8. But h5 data type is different.");
+          break;
          default:
            throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
        }
@@ -221,6 +227,13 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
            data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
            break;
          }
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
          default:
            throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
        }
diff --git a/tests/tools/nnpackage_run/src/h5formatter.h b/tests/tools/nnpackage_run/src/h5formatter.h

index 203ba0e72a129a6567f16564cb22ab879d5f5bd3..5c831021b7bb99f05dae4ced8bd5983cd399d5d5 100644 (file)
--- a/tests/tools/nnpackage_run/src/h5formatter.h
+++ b/tests/tools/nnpackage_run/src/h5formatter.h
@@ -38,6 +38,6 @@ public:
  private:
    nnfw_session *session_;
  };
-} // end of namespace
+} // namespace nnpkg_run
  
  #endif // __NNPACKAGE_RUN_H5FORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.cc b/tests/tools/nnpackage_run/src/nnfw_util.cc

index 6c37eed45deed3681618f7d626a50b31fab87622..a57069bd874f163bf23ef93af878aa55b36cabcb 100644 (file)
--- a/tests/tools/nnpackage_run/src/nnfw_util.cc
+++ b/tests/tools/nnpackage_run/src/nnfw_util.cc
@@ -40,9 +40,9 @@ uint64_t bufsize_for(const nnfw_tensorinfo *ti)
      sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
      sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
      sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
-
+    sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
    };
    return elmsize[ti->dtype] * num_elems(ti);
  }
  
-} // end of namespace
+} // namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc

index 5bde74ff7bf80a055f45a6b876a917aff6e9aecb..1fcab512acbb5b184fbeea9e735bba2219006b9e 100644 (file)
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -97,7 +97,7 @@ int main(const int argc, char **argv)
          nnfw_tensorinfo ti;
          NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
  
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED)
          {
            std::cerr << "E: not supported input type" << std::endl;
            exit(-1);
@@ -114,7 +114,7 @@ int main(const int argc, char **argv)
          nnfw_tensorinfo ti;
          NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
  
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED)
          {
            std::cerr << "E: not supported output type" << std::endl;
            exit(-1);
diff --git a/tests/tools/nnpackage_run/src/randomgen.h b/tests/tools/nnpackage_run/src/randomgen.h

index 9ca51dd117a0d5d57c686baffbfca4d95220897e..898df34fc39c1936f74f4d497a9069235ea89afd 100644 (file)
--- a/tests/tools/nnpackage_run/src/randomgen.h
+++ b/tests/tools/nnpackage_run/src/randomgen.h
@@ -35,6 +35,6 @@ public:
  private:
    nnfw_session *session_;
  };
-} // end of namespace
+} // namespace nnpkg_run
  
  #endif // __NNPACKAGE_RUN_RANDOMGEN_H__
diff --git a/tests/tools/tflite_comparator/CMakeLists.txt b/tests/tools/tflite_comparator/CMakeLists.txt

new file mode 100644 (file)

index 0000000..54e3f61
--- /dev/null
+++ b/tests/tools/tflite_comparator/CMakeLists.txt
@@ -0,0 +1,23 @@
+if(NOT BUILD_TFLITE_COMPARATOR_TEST_TOOL)
+  message("skipping tflite comparator tool build")
+  return()
+endif(NOT BUILD_TFLITE_COMPARATOR_TEST_TOOL)
+
+if(NOT BUILD_ONERT)
+  message("skipping tflite comparator tool build: onert is not built")
+  return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND SOURCES "src/tflite_comparator.cc")
+list(APPEND SOURCES "src/args.cc")
+
+nnfw_find_package(Boost REQUIRED program_options system filesystem)
+
+add_executable(tflite_comparator ${SOURCES})
+target_include_directories(tflite_comparator PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(tflite_comparator nnfw-dev)
+target_link_libraries(tflite_comparator nnfw_lib_tflite nnfw_lib_misc)
+target_link_libraries(tflite_comparator ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
+
+install(TARGETS tflite_comparator DESTINATION bin)
diff --git a/tests/tools/tflite_comparator/src/args.cc b/tests/tools/tflite_comparator/src/args.cc

new file mode 100644 (file)

index 0000000..ecab20b
--- /dev/null
+++ b/tests/tools/tflite_comparator/src/args.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <iostream>
+
+#include <boost/filesystem.hpp>
+
+namespace TFLiteRun
+{
+
+Args::Args(const int argc, char **argv) noexcept
+{
+  Initialize();
+  Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+  // General options
+  po::options_description general("General options");
+
+  // clang-format off
+  general.add_options()
+    ("help,h", "Display available options")
+    ("tflite", po::value<std::string>()->default_value("")->required(), "Input tflite model file for serialization")
+    ("data,d", po::value<std::vector<std::string>>()->multitoken()->default_value(std::vector<std::string>{}, ""), "Input data file for model");
+  // clang-format on
+
+  _options.add(general);
+  _positional.add("tflite", 1);
+}
+
+void Args::print(char **argv)
+{
+  std::cout << "tflite_comparator" << std::endl << std::endl;
+  std::cout << "Load tflite model by onert and TFLite, and compare their output" << std::endl;
+  std::cout << "Usage:" << std::endl;
+  std::cout << argv[0] << " --tflite model_file.tflite --data input_data.dat" << std::endl;
+  std::cout << _options;
+  std::cout << std::endl;
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+            vm);
+  po::notify(vm);
+
+  if (vm.count("help"))
+  {
+    print(argv);
+
+    exit(0);
+  }
+
+  try
+  {
+    if (vm.count("tflite"))
+    {
+      _tflite_filename = vm["tflite"].as<std::string>();
+    }
+
+    if (vm.count("data"))
+    {
+      _data_filenames = vm["data"].as<std::vector<std::string>>();
+    }
+  }
+  catch (const std::bad_cast &e)
+  {
+    std::cerr << e.what() << '\n';
+    print(argv);
+    exit(1);
+  }
+}
+
+} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_comparator/src/args.h b/tests/tools/tflite_comparator/src/args.h

new file mode 100644 (file)

index 0000000..4d0e8ff
--- /dev/null
+++ b/tests/tools/tflite_comparator/src/args.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
+#define __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
+
+#include <string>
+#include <boost/program_options.hpp>
+
+namespace po = boost::program_options;
+
+namespace TFLiteRun
+{
+
+class Args
+{
+public:
+  Args(const int argc, char **argv) noexcept;
+  void print(char **argv);
+
+  const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
+  const std::vector<std::string> &getDataFilenames(void) const { return _data_filenames; }
+
+private:
+  void Initialize();
+  void Parse(const int argc, char **argv);
+
+private:
+  po::options_description _options;
+  po::positional_options_description _positional;
+
+  std::string _tflite_filename;
+  std::vector<std::string> _data_filenames;
+};
+
+} // namespace TFLiteRun
+
+#endif // __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
diff --git a/tests/tools/tflite_comparator/src/tflite_comparator.cc b/tests/tools/tflite_comparator/src/tflite_comparator.cc

new file mode 100644 (file)

index 0000000..7e51905
--- /dev/null
+++ b/tests/tools/tflite_comparator/src/tflite_comparator.cc
@@ -0,0 +1,393 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <nnfw_experimental.h>
+#include <nnfw_internal.h>
+
+#include <misc/EnvVar.h>
+#include <misc/fp32.h>
+#include <misc/RandomGenerator.h>
+
+#include <tflite/Assert.h>
+#include <tflite/InterpreterSession.h>
+#include <tflite/ext/kernels/register.h>
+
+#include <iostream>
+#include <fstream>
+#include <memory>
+
+const int RUN_FAILED = 1;
+
+using namespace tflite;
+using namespace nnfw::tflite;
+
+const int FILE_ERROR = 2;
+
+#define NNFW_ASSERT_FAIL(expr, msg)   \
+  if ((expr) != NNFW_STATUS_NO_ERROR) \
+  {                                   \
+    std::cerr << msg << std::endl;    \
+    exit(-1);                         \
+  }
+
+// Read vector of floats from selected file
+void readData(const string &path, std::vector<uint8_t> &dest)
+{
+  std::ifstream in(path);
+  if (!in.good())
+  {
+    std::cerr << "can not open data file " << path << "\n";
+    exit(FILE_ERROR);
+  }
+  in.seekg(0, std::ifstream::end);
+  size_t len = in.tellg();
+  in.seekg(0, std::ifstream::beg);
+
+  assert(dest.size() == len);
+  in.read(reinterpret_cast<char *>(dest.data()), len);
+}
+
+template <typename T>
+void randomData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+  size_t elements = dest.size() / sizeof(T);
+  assert(dest.size() % sizeof(T) == 0);
+
+  std::vector<T> vec(elements);
+  for (uint64_t i = 0; i < elements; i++)
+  {
+    vec[i] = randgen.generate<T>();
+  }
+  memcpy(dest.data(), vec.data(), elements * sizeof(T));
+}
+
+void randomBoolData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+  size_t elements = dest.size();
+  std::vector<uint8_t> vec(elements);
+  for (uint64_t i = 0; i < elements; i++)
+  {
+    bool value = randgen.generate<bool>();
+    dest[i] = value ? 1 : 0;
+  }
+}
+
+inline uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+  uint64_t n = 1;
+  for (uint32_t i = 0; i < ti->rank; ++i)
+  {
+    n *= ti->dims[i];
+  }
+  return n;
+}
+
+inline size_t sizeOfNnfwType(NNFW_TYPE type)
+{
+  switch (type)
+  {
+    case NNFW_TYPE_TENSOR_BOOL:
+    case NNFW_TYPE_TENSOR_UINT8:
+    case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+    case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+      return 1;
+    case NNFW_TYPE_TENSOR_FLOAT32:
+    case NNFW_TYPE_TENSOR_INT32:
+      return 4;
+    case NNFW_TYPE_TENSOR_INT64:
+      return 8;
+    default:
+      throw std::runtime_error{"Invalid tensor type"};
+  }
+}
+
+template <typename T>
+bool compareBuffersExact(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+  bool match = true;
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
+  {
+    T ref = ref_buf[e];
+    T act = reinterpret_cast<const T *>(act_buf.data())[e];
+
+    if (ref != act)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+bool compareBuffersExactBool(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf,
+                             uint32_t index)
+{
+  bool match = true;
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
+  {
+    uint8_t ref_raw = ref_buf[e];
+    bool ref = (ref_raw != 0 ? true : false);
+    uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
+    bool act = (act_raw != 0 ? true : false);
+    if (ref != act)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+int main(const int argc, char **argv)
+{
+  TFLiteRun::Args args(argc, argv);
+
+  auto tflite_file = args.getTFLiteFilename();
+  auto data_files = args.getDataFilenames();
+
+  if (tflite_file.empty())
+  {
+    args.print(argv);
+    return RUN_FAILED;
+  }
+
+  std::cout << "[Execution] Stage start!" << std::endl;
+  // Loading
+  nnfw_session *onert_session = nullptr;
+  NNFW_ASSERT_FAIL(nnfw_create_session(&onert_session), "[ ERROR ] Failure during model load");
+  if (onert_session == nullptr)
+  {
+    std::cerr << "[ ERROR ] Failure to open session" << std::endl;
+    exit(-1);
+  }
+
+  NNFW_ASSERT_FAIL(nnfw_load_model_from_modelfile(onert_session, tflite_file.c_str()),
+                   "[ ERROR ] Failure during model load");
+
+  uint32_t num_inputs;
+  uint32_t num_outputs;
+  NNFW_ASSERT_FAIL(nnfw_input_size(onert_session, &num_inputs),
+                   "[ ERROR ] Failure during get model inputs");
+  NNFW_ASSERT_FAIL(nnfw_output_size(onert_session, &num_outputs),
+                   "[ ERROR ] Failure during get model outputs");
+
+  std::cout << "[Execution] Model is deserialized!" << std::endl;
+
+  // Compile
+  nnfw_prepare(onert_session);
+
+  std::cout << "[Execution] Model compiled!" << std::endl;
+
+  // Prepare input/output data
+  std::vector<std::vector<uint8_t>> inputs(num_inputs);
+  std::vector<std::vector<uint8_t>> outputs(num_outputs);
+
+  bool generate_data = data_files.empty();
+  bool read_data = data_files.size() == num_inputs;
+  if (!generate_data && !read_data)
+  {
+    std::cerr << "[ ERROR ] "
+              << "Wrong number of input files." << std::endl;
+    exit(1);
+  }
+
+  const int seed = 1; /* TODO Add an option for seed value */
+  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    nnfw_tensorinfo ti_input;
+    NNFW_ASSERT_FAIL(nnfw_input_tensorinfo(onert_session, i, &ti_input),
+                     "[ ERROR ] Failure during get input data info");
+    size_t input_size = num_elems(&ti_input) * sizeOfNnfwType(ti_input.dtype);
+
+    inputs[i].resize(input_size);
+
+    if (generate_data)
+    {
+      switch (ti_input.dtype)
+      {
+        case NNFW_TYPE_TENSOR_BOOL:
+          randomBoolData(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_UINT8:
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+          randomData<uint8_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          randomData<int8_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_FLOAT32:
+          randomData<float>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_INT32:
+          randomData<int32_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_INT64:
+          randomData<uint64_t>(randgen, inputs[i]);
+          break;
+        default:
+          std::cerr << "[ ERROR ] "
+                    << "Unspported input data type" << std::endl;
+          exit(-1);
+          break;
+      }
+    }
+    else /* read_data */
+      readData(data_files[i], inputs[i]);
+
+    NNFW_ASSERT_FAIL(nnfw_set_input(onert_session, i, ti_input.dtype, inputs[i].data(), input_size),
+                     "[ ERROR ] Failure to set input tensor buffer");
+  }
+
+  std::cout << "[Execution] Input data is defined!" << std::endl;
+
+  for (uint32_t i = 0; i < num_outputs; i++)
+  {
+    nnfw_tensorinfo ti_output;
+    NNFW_ASSERT_FAIL(nnfw_output_tensorinfo(onert_session, i, &ti_output),
+                     "[ ERROR ] Failure during get output tensor info");
+
+    uint64_t output_elements = num_elems(&ti_output);
+    size_t output_size = output_elements * sizeOfNnfwType(ti_output.dtype);
+    outputs[i].resize(output_size);
+
+    NNFW_ASSERT_FAIL(
+      nnfw_set_output(onert_session, i, ti_output.dtype, outputs[i].data(), output_size),
+      "[ ERROR ] Failure to set output tensor buffer");
+  }
+
+  // Execute
+  NNFW_ASSERT_FAIL(nnfw_run(onert_session), "[Execution] Can't execute");
+
+  std::cout << "[Execution] Done!" << std::endl;
+
+  // Compare with tflite
+  std::cout << "[Comparison] Stage start!" << std::endl;
+  // Read tflite model
+  StderrReporter error_reporter;
+  auto model = FlatBufferModel::BuildFromFile(tflite_file.c_str(), &error_reporter);
+
+  BuiltinOpResolver resolver;
+  InterpreterBuilder builder(*model, resolver);
+
+  std::unique_ptr<Interpreter> interpreter;
+  try
+  {
+    TFLITE_ENSURE(builder(&interpreter));
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    exit(FILE_ERROR);
+  }
+  interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(1));
+
+  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+  sess->prepare();
+  // Set input and run
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
+    memcpy(input_tensor->data.uint8, inputs[i].data(), inputs[i].size());
+  }
+  if (!sess->run())
+  {
+    std::cout << "[Comparison] TFLite run failed!" << std::endl;
+    assert(0 && "Run failed!");
+  }
+  std::cout << "[Comparison] TFLite run done!" << std::endl;
+
+  // Calculate max difference over all outputs
+  float max_float_difference = 0.0f;
+  bool find_unmatched_output = false;
+  auto tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
+
+  for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
+  {
+    nnfw_tensorinfo ti;
+    nnfw_output_tensorinfo(onert_session, out_idx, &ti);
+
+    bool matched = true;
+    // Check output tensor values
+
+    const auto &ref_output = interpreter->tensor(interpreter->outputs().at(out_idx))->data;
+    const auto &output = outputs[out_idx];
+
+    switch (ti.dtype)
+    {
+      case NNFW_TYPE_TENSOR_BOOL:
+        matched = compareBuffersExactBool(ref_output.uint8, output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_UINT8:
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        matched = compareBuffersExact<uint8_t>(ref_output.uint8, output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        matched = compareBuffersExact<int8_t>(ref_output.int8, output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_INT32:
+        matched = compareBuffersExact<int32_t>(ref_output.i32, output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_FLOAT32:
+        // TODO better way for handling FP error?
+        for (uint32_t e = 0; e < num_elems(&ti); e++)
+        {
+          float refval = ref_output.f[e];
+          float val = reinterpret_cast<const float *>(output.data())[e];
+          if (std::abs(refval - val) > max_float_difference)
+            max_float_difference = std::abs(refval - val);
+
+          matched = nnfw::misc::fp32::absolute_epsilon_equal(refval, val)
+                      ? true
+                      : nnfw::misc::fp32::epsilon_equal(refval, val, tolerance);
+        }
+        break;
+      case NNFW_TYPE_TENSOR_INT64:
+        matched = compareBuffersExact<int64_t>(ref_output.i64, output, out_idx);
+        break;
+      default:
+        throw std::runtime_error{"Invalid tensor type"};
+    }
+
+    if (!matched)
+      find_unmatched_output = true;
+  }
+
+  // Print results
+  std::cout << "[Comparison] Max float difference: " << max_float_difference << std::endl;
+  int ret = 0;
+  if (find_unmatched_output)
+  {
+    std::cout << "[Comparison] outputs is not equal!" << std::endl;
+    ret = 1;
+  }
+  else
+  {
+    std::cout << "[Comparison] Outputs is equal!" << std::endl;
+  }
+  std::cout << "[Comparison] Done!" << std::endl;
+
+  nnfw_close_session(onert_session);
+
+  return ret;
+}
diff --git a/tests/tools/tflite_loader/CMakeLists.txt b/tests/tools/tflite_loader/CMakeLists.txt

deleted file mode 100644 (file)

index 6be3158..0000000
--- a/tests/tools/tflite_loader/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-if(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
-  message("skipping tflite loader tool build")
-  return()
-endif(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
-
-if(NOT BUILD_ONERT)
-  message("skipping tflite loader tool build: onert is not built")
-  return()
-endif(NOT BUILD_ONERT)
-
-list(APPEND SOURCES "src/tflite_loader.cc")
-list(APPEND SOURCES "src/args.cc")
-
-nnfw_find_package(Boost REQUIRED program_options system filesystem)
-
-add_executable(tflite_loader_test_tool ${SOURCES})
-target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_loader_test_tool nnfw-dev)
-target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite nnfw_lib_misc)
-target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
-
-install(TARGETS tflite_loader_test_tool DESTINATION bin)
diff --git a/tests/tools/tflite_loader/src/args.cc b/tests/tools/tflite_loader/src/args.cc

deleted file mode 100644 (file)

index e9fb141..0000000
--- a/tests/tools/tflite_loader/src/args.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-#include <boost/filesystem.hpp>
-
-namespace TFLiteRun
-{
-
-Args::Args(const int argc, char **argv) noexcept
-{
-  Initialize();
-  Parse(argc, argv);
-}
-
-void Args::Initialize(void)
-{
-  // General options
-  po::options_description general("General options");
-
-  // clang-format off
-  general.add_options()
-    ("help,h", "Display available options")
-    ("tflite", po::value<std::string>()->default_value("")->required(), "Input tflite model file for serialization")
-    ("data,d", po::value<std::vector<std::string>>()->multitoken()->default_value(std::vector<std::string>{}, ""), "Input data file for model");
-  // clang-format on
-
-  _options.add(general);
-  _positional.add("tflite", 1);
-}
-
-void Args::print(char **argv)
-{
-  std::cout << "tflite_loader" << std::endl << std::endl;
-  std::cout << "Load tflite model by Loader and TFLite and compare their output" << std::endl;
-  std::cout << "Usage:" << std::endl;
-  std::cout << argv[0] << " --tflite model_file.tflite --data input_data.dat" << std::endl;
-  std::cout << _options;
-  std::cout << std::endl;
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-  po::notify(vm);
-
-  if (vm.count("help"))
-  {
-    print(argv);
-
-    exit(0);
-  }
-
-  try
-  {
-    if (vm.count("tflite"))
-    {
-      _tflite_filename = vm["tflite"].as<std::string>();
-    }
-
-    if (vm.count("data"))
-    {
-      _data_filenames = vm["data"].as<std::vector<std::string>>();
-    }
-  }
-  catch (const std::bad_cast &e)
-  {
-    std::cerr << e.what() << '\n';
-    print(argv);
-    exit(1);
-  }
-}
-
-} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_loader/src/args.h b/tests/tools/tflite_loader/src/args.h

deleted file mode 100644 (file)

index 4d0e8ff..0000000
--- a/tests/tools/tflite_loader/src/args.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
-#define __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
-
-#include <string>
-#include <boost/program_options.hpp>
-
-namespace po = boost::program_options;
-
-namespace TFLiteRun
-{
-
-class Args
-{
-public:
-  Args(const int argc, char **argv) noexcept;
-  void print(char **argv);
-
-  const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
-  const std::vector<std::string> &getDataFilenames(void) const { return _data_filenames; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::options_description _options;
-  po::positional_options_description _positional;
-
-  std::string _tflite_filename;
-  std::vector<std::string> _data_filenames;
-};
-
-} // namespace TFLiteRun
-
-#endif // __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
diff --git a/tests/tools/tflite_loader/src/tflite_loader.cc b/tests/tools/tflite_loader/src/tflite_loader.cc

deleted file mode 100644 (file)

index f77570c..0000000
--- a/tests/tools/tflite_loader/src/tflite_loader.cc
+++ /dev/null
@@ -1,395 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <nnfw_experimental.h>
-#include <nnfw_internal.h>
-
-#include <misc/EnvVar.h>
-#include <misc/RandomGenerator.h>
-
-#include <tflite/Assert.h>
-#include <tflite/InterpreterSession.h>
-#include <tflite/ext/kernels/register.h>
-
-#include <iostream>
-#include <fstream>
-#include <memory>
-
-const int RUN_FAILED = 1;
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-const int FILE_ERROR = 2;
-const float DIFFERENCE_THRESHOLD = 10e-5;
-
-#define NNFW_ASSERT_FAIL(expr, msg)   \
-  if ((expr) != NNFW_STATUS_NO_ERROR) \
-  {                                   \
-    std::cerr << msg << std::endl;    \
-    exit(-1);                         \
-  }
-
-// Read vector of floats from selected file
-void readData(const string &path, std::vector<uint8_t> &dest)
-{
-  std::ifstream in(path);
-  if (!in.good())
-  {
-    std::cerr << "can not open data file " << path << "\n";
-    exit(FILE_ERROR);
-  }
-  in.seekg(0, std::ifstream::end);
-  size_t len = in.tellg();
-  in.seekg(0, std::ifstream::beg);
-
-  assert(dest.size() == len);
-  in.read(reinterpret_cast<char *>(dest.data()), len);
-}
-
-template <typename T>
-void randomData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
-{
-  size_t elements = dest.size() / sizeof(T);
-  assert(dest.size() % sizeof(T) == 0);
-
-  std::vector<T> vec(elements);
-  for (uint64_t i = 0; i < elements; i++)
-  {
-    vec[i] = randgen.generate<T>();
-  }
-  memcpy(dest.data(), vec.data(), elements * sizeof(T));
-}
-
-void randomBoolData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
-{
-  size_t elements = dest.size();
-  std::vector<uint8_t> vec(elements);
-  for (uint64_t i = 0; i < elements; i++)
-  {
-    bool value = randgen.generate<bool>();
-    dest[i] = value ? 1 : 0;
-  }
-}
-
-inline uint64_t num_elems(const nnfw_tensorinfo *ti)
-{
-  uint64_t n = 1;
-  for (uint32_t i = 0; i < ti->rank; ++i)
-  {
-    n *= ti->dims[i];
-  }
-  return n;
-}
-
-inline size_t sizeOfNnfwType(NNFW_TYPE type)
-{
-  switch (type)
-  {
-    case NNFW_TYPE_TENSOR_BOOL:
-    case NNFW_TYPE_TENSOR_UINT8:
-    case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-    case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
-      return 1;
-    case NNFW_TYPE_TENSOR_FLOAT32:
-    case NNFW_TYPE_TENSOR_INT32:
-      return 4;
-    case NNFW_TYPE_TENSOR_INT64:
-      return 8;
-    default:
-      throw std::runtime_error{"Invalid tensor type"};
-  }
-}
-
-template <typename T>
-bool compareBuffersExact(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
-{
-  bool match = true;
-  for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
-  {
-    T ref = ref_buf[e];
-    T act = reinterpret_cast<const T *>(act_buf.data())[e];
-
-    if (ref != act)
-    {
-      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
-                << ", act: " << act << std::endl;
-      match = false;
-    }
-  }
-
-  return match;
-}
-
-bool compareBuffersExactBool(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf,
-                             uint32_t index)
-{
-  bool match = true;
-  for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
-  {
-    uint8_t ref_raw = ref_buf[e];
-    bool ref = (ref_raw != 0 ? true : false);
-    uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
-    bool act = (act_raw != 0 ? true : false);
-    if (ref != act)
-    {
-      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
-                << ", act: " << act << std::endl;
-      match = false;
-    }
-  }
-
-  return match;
-}
-
-int main(const int argc, char **argv)
-{
-  TFLiteRun::Args args(argc, argv);
-
-  auto tflite_file = args.getTFLiteFilename();
-  auto data_files = args.getDataFilenames();
-
-  if (tflite_file.empty())
-  {
-    args.print(argv);
-    return RUN_FAILED;
-  }
-
-  std::cout << "[Execution] Stage start!" << std::endl;
-  // Loading
-  nnfw_session *onert_session = nullptr;
-  NNFW_ASSERT_FAIL(nnfw_create_session(&onert_session), "[ ERROR ] Failure during model load");
-  if (onert_session == nullptr)
-  {
-    std::cerr << "[ ERROR ] Failure to open session" << std::endl;
-    exit(-1);
-  }
-
-  NNFW_ASSERT_FAIL(nnfw_load_model_from_modelfile(onert_session, tflite_file.c_str()),
-                   "[ ERROR ] Failure during model load");
-
-  uint32_t num_inputs;
-  uint32_t num_outputs;
-  NNFW_ASSERT_FAIL(nnfw_input_size(onert_session, &num_inputs),
-                   "[ ERROR ] Failure during get model inputs");
-  NNFW_ASSERT_FAIL(nnfw_output_size(onert_session, &num_outputs),
-                   "[ ERROR ] Failure during get model outputs");
-
-  std::cout << "[Execution] Model is deserialized!" << std::endl;
-
-  // Compile
-  nnfw_prepare(onert_session);
-
-  std::cout << "[Execution] Model compiled!" << std::endl;
-
-  // Prepare input/output data
-  std::vector<std::vector<uint8_t>> inputs(num_inputs);
-  std::vector<std::vector<uint8_t>> outputs(num_outputs);
-
-  bool generate_data = data_files.empty();
-  bool read_data = data_files.size() == num_inputs;
-  if (!generate_data && !read_data)
-  {
-    std::cerr << "[ ERROR ] "
-              << "Wrong number of input files." << std::endl;
-    exit(1);
-  }
-
-  const int seed = 1; /* TODO Add an option for seed value */
-  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-
-  for (uint32_t i = 0; i < num_inputs; i++)
-  {
-    nnfw_tensorinfo ti_input;
-    NNFW_ASSERT_FAIL(nnfw_input_tensorinfo(onert_session, i, &ti_input),
-                     "[ ERROR ] Failure during get input data info");
-    size_t input_size = num_elems(&ti_input) * sizeOfNnfwType(ti_input.dtype);
-
-    inputs[i].resize(input_size);
-
-    if (generate_data)
-    {
-      switch (ti_input.dtype)
-      {
-        case NNFW_TYPE_TENSOR_BOOL:
-          randomBoolData(randgen, inputs[i]);
-          break;
-        case NNFW_TYPE_TENSOR_UINT8:
-        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-          randomData<uint8_t>(randgen, inputs[i]);
-          break;
-        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
-          randomData<int8_t>(randgen, inputs[i]);
-          break;
-        case NNFW_TYPE_TENSOR_FLOAT32:
-          randomData<float>(randgen, inputs[i]);
-          break;
-        case NNFW_TYPE_TENSOR_INT32:
-          randomData<int32_t>(randgen, inputs[i]);
-          break;
-        case NNFW_TYPE_TENSOR_INT64:
-          randomData<uint64_t>(randgen, inputs[i]);
-          break;
-        default:
-          std::cerr << "[ ERROR ] "
-                    << "Unspported input data type" << std::endl;
-          exit(-1);
-          break;
-      }
-    }
-    else /* read_data */
-      readData(data_files[i], inputs[i]);
-
-    NNFW_ASSERT_FAIL(nnfw_set_input(onert_session, i, ti_input.dtype, inputs[i].data(), input_size),
-                     "[ ERROR ] Failure to set input tensor buffer");
-  }
-
-  std::cout << "[Execution] Input data is defined!" << std::endl;
-
-  for (uint32_t i = 0; i < num_outputs; i++)
-  {
-    nnfw_tensorinfo ti_output;
-    NNFW_ASSERT_FAIL(nnfw_output_tensorinfo(onert_session, i, &ti_output),
-                     "[ ERROR ] Failure during get output tensor info");
-
-    uint64_t output_elements = num_elems(&ti_output);
-    size_t output_size = output_elements * sizeOfNnfwType(ti_output.dtype);
-    outputs[i].resize(output_size);
-
-    NNFW_ASSERT_FAIL(
-      nnfw_set_output(onert_session, i, ti_output.dtype, outputs[i].data(), output_size),
-      "[ ERROR ] Failure to set output tensor buffer");
-  }
-
-  // Execute
-  NNFW_ASSERT_FAIL(nnfw_run(onert_session), "[Execution] Can't execute");
-
-  std::cout << "[Execution] Done!" << std::endl;
-
-  // Compare with tflite
-  std::cout << "[Comparison] Stage start!" << std::endl;
-  // Read tflite model
-  StderrReporter error_reporter;
-  auto model = FlatBufferModel::BuildFromFile(tflite_file.c_str(), &error_reporter);
-
-  BuiltinOpResolver resolver;
-  InterpreterBuilder builder(*model, resolver);
-
-  std::unique_ptr<Interpreter> interpreter;
-  try
-  {
-    TFLITE_ENSURE(builder(&interpreter));
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    exit(FILE_ERROR);
-  }
-  interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
-
-  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
-  sess->prepare();
-  // Set input and run
-  for (uint32_t i = 0; i < num_inputs; i++)
-  {
-    auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
-    memcpy(input_tensor->data.uint8, inputs[i].data(), inputs[i].size());
-  }
-  if (!sess->run())
-  {
-    std::cout << "[Comparison] TFLite run failed!" << std::endl;
-    assert(0 && "Run failed!");
-  }
-  std::cout << "[Comparison] TFLite run done!" << std::endl;
-
-  // Calculate max difference over all outputs
-  float max_float_difference = 0.0f;
-  bool find_unmatched_output = false;
-
-  for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
-  {
-    nnfw_tensorinfo ti;
-    nnfw_output_tensorinfo(onert_session, out_idx, &ti);
-
-    bool matched = true;
-    // Check output tensor values
-
-    const auto &ref_output = interpreter->tensor(interpreter->outputs().at(out_idx))->data;
-    const auto &output = outputs[out_idx];
-
-    switch (ti.dtype)
-    {
-      case NNFW_TYPE_TENSOR_BOOL:
-        matched = compareBuffersExactBool(ref_output.uint8, output, out_idx);
-        break;
-      case NNFW_TYPE_TENSOR_UINT8:
-      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-        matched = compareBuffersExact<uint8_t>(ref_output.uint8, output, out_idx);
-        break;
-      case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
-        matched = compareBuffersExact<int8_t>(ref_output.int8, output, out_idx);
-        break;
-      case NNFW_TYPE_TENSOR_INT32:
-        matched = compareBuffersExact<int32_t>(ref_output.i32, output, out_idx);
-        break;
-      case NNFW_TYPE_TENSOR_FLOAT32:
-        // TODO better way for handling FP error?
-        for (uint32_t e = 0; e < num_elems(&ti); e++)
-        {
-          float refval = ref_output.f[e];
-          float val = reinterpret_cast<const float *>(output.data())[e];
-          if (std::abs(refval - val) > max_float_difference)
-            max_float_difference = std::abs(refval - val);
-
-          if (max_float_difference > DIFFERENCE_THRESHOLD)
-            matched = false;
-        }
-        break;
-      case NNFW_TYPE_TENSOR_INT64:
-        matched = compareBuffersExact<int64_t>(ref_output.i64, output, out_idx);
-        break;
-      default:
-        throw std::runtime_error{"Invalid tensor type"};
-    }
-
-    if (!matched)
-      find_unmatched_output = true;
-  }
-
-  // Print results
-  std::cout << "[Comparison] Max float difference: " << max_float_difference << std::endl;
-  int ret = 0;
-  if (find_unmatched_output)
-  {
-    std::cout << "[Comparison] outputs is not equal!" << std::endl;
-    if (max_float_difference > DIFFERENCE_THRESHOLD)
-    {
-      std::cout << "[Comparison] Float outputs is not equal!" << std::endl;
-    }
-    ret = 1;
-  }
-  else
-  {
-    std::cout << "[Comparison] Outputs is equal!" << std::endl;
-  }
-  std::cout << "[Comparison] Done!" << std::endl;
-
-  nnfw_close_session(onert_session);
-
-  return ret;
-}
diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc

index d42f99234da4750e0cb8864e41668598f973850c..14f50125865be2f806c99a7dbe02e2cd08661628 100644 (file)
--- a/tests/tools/tflite_run/src/tflite_run.cc
+++ b/tests/tools/tflite_run/src/tflite_run.cc
@@ -26,6 +26,7 @@
  #include "tflite/Diff.h"
  #include "tflite/Assert.h"
  #include "tflite/Session.h"
+#include "tflite/RandomInputInitializer.h"
  #include "tflite/InterpreterSession.h"
  #include "tflite/NNAPISession.h"
  #include "misc/tensor/IndexIterator.h"
@@ -71,7 +72,7 @@ public:
    }
  };
  
-} // namespace anonymous
+} // namespace
  
  int main(const int argc, char **argv)
  {
@@ -112,7 +113,7 @@ int main(const int argc, char **argv)
        BuiltinOpResolver resolver;
        InterpreterBuilder builder(*model, resolver);
        TFLITE_ENSURE(builder(&interpreter))
-      interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
+      interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(1));
      });
    }
    catch (const std::exception &e)
@@ -196,70 +197,8 @@ int main(const int argc, char **argv)
      const int seed = 1; /* TODO Add an option for seed value */
      nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
  
-    // No input specified. So we fill the input tensors with random values.
-    for (const auto &o : interpreter->inputs())
-    {
-      TfLiteTensor *tensor = interpreter->tensor(o);
-      if (tensor->type == kTfLiteInt32)
-      {
-        // Generate singed 32-bit integer (s32) input
-        auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o);
-
-        int32_t value = 0;
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               // TODO Generate random values
-               // Gather operation: index should be within input coverage.
-               tensor_view.at(ind) = value;
-               value++;
-             };
-      }
-      else if (tensor->type == kTfLiteUInt8)
-      {
-        // Generate unsigned 8-bit integer input
-        auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
-
-        auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
-          const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-          &nnfw::misc::RandomGenerator::generate<uint8_t>);
-        const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
-                                                       std::bind(fp, randgen, _1, _2));
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               const auto value = data.at(ind);
-               tensor_view.at(ind) = value;
-             };
-      }
-      else if (tensor->type == kTfLiteBool)
-      {
-        // Generate bool input
-        auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
-
-        auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-          const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-          &nnfw::misc::RandomGenerator::generate<bool>);
-        const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
-                                                    std::bind(fp, randgen, _1, _2));
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               const auto value = data.at(ind);
-               tensor_view.at(ind) = value;
-             };
-      }
-      else
-      {
-        assert(tensor->type == kTfLiteFloat32);
-
-        const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
-        for (float *ptr = tensor->data.f; ptr < end; ptr++)
-        {
-          *ptr = randgen.generate<float>();
-        }
-      }
-    }
+    RandomInputInitializer initializer{randgen};
+    initializer.run(*(interpreter.get()));
    }
  
    TFLiteRun::TensorDumper tensor_dumper;
diff --git a/tests/tools/tflite_vanilla_run/CMakeLists.txt b/tests/tools/tflite_vanilla_run/CMakeLists.txt

index 19e21e923b923586d89db10beca24d3e45910719..a673058a4052c31059efa5d5f69c6d49c4b71cbc 100644 (file)
--- a/tests/tools/tflite_vanilla_run/CMakeLists.txt
+++ b/tests/tools/tflite_vanilla_run/CMakeLists.txt
@@ -6,7 +6,7 @@ if(NOT BUILD_TENSORFLOW_LITE_2_3_0)
    set(BUILD_TENSORFLOW_LITE_2_3_0 ON)
  endif()
  
-nnfw_find_package(TensorFlowLite-2.3.0 REQUIRED)
+nnfw_find_package(TensorFlowLite EXACT 2.3.0 REQUIRED)
  nnfw_find_package(Boost REQUIRED)
  
  list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
diff --git a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc

index e9fb04c7ddc4033e81843d5c022811094bd9198e..77b5e7a37a6b19af78ccf9b3253b3c201bfde9cf 100644 (file)
--- a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
+++ b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
@@ -73,7 +73,7 @@ public:
    }
  };
  
-} // namespace anonymous
+} // namespace
  
  int main(const int argc, char **argv)
  {
diff --git a/tools/.clang-format b/tools/.clang-format

deleted file mode 120000 (symlink)

index 0ff66f3..0000000
--- a/tools/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../.clang-format.8
-\ No newline at end of file
diff --git a/tools/cross/aarch64/sources.list.trusty b/tools/cross/aarch64/sources.list.trusty

deleted file mode 100644 (file)

index 8aa98a2..0000000
--- a/tools/cross/aarch64/sources.list.trusty
+++ /dev/null
@@ -1,11 +0,0 @@
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
diff --git a/tools/cross/arm/sources.list.trusty b/tools/cross/arm/sources.list.trusty

deleted file mode 100644 (file)

index 8aa98a2..0000000
--- a/tools/cross/arm/sources.list.trusty
+++ /dev/null
@@ -1,11 +0,0 @@
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
diff --git a/tools/cross/install_rootfs.sh b/tools/cross/install_rootfs.sh

index 5a65dac8531da4d394f3131fe1839b1440d29d38..fa32c73501983e6a30b78b47a0860e8d3819f22e 100755 (executable)
--- a/tools/cross/install_rootfs.sh
+++ b/tools/cross/install_rootfs.sh
@@ -5,7 +5,7 @@ usage()
  {
      echo "Usage: $0 [BuildArch] [LinuxCodeName] [--setproxy=IP] [--skipunmount]"
      echo "BuildArch can be: arm(default), aarch64 and armel"
-    echo "LinuxCodeName - optional, Code name for Linux, can be: bionic(default), trusty, xenial, focal"
+    echo "LinuxCodeName - optional, Code name for Linux, can be: xenial, bionic(default), focal"
      echo "                          If BuildArch is armel, this can be tizen(default)"
      echo "--setproxy=IP - optional, IP is the proxy server IP address or url with portnumber"
      echo "                           default no proxy. Example: --setproxy=127.1.2.3:8080"
@@ -69,9 +69,6 @@ for i in "$@" ; do
              __UbuntuRepo=
              __LinuxCodeName=
              ;;
-        trusty)
-            __LinuxCodeName=trusty
-            ;;
          xenial)
              __LinuxCodeName=xenial
              ;;
diff --git a/tools/nnpackage_tool/gen_golden/gen_golden.py b/tools/nnpackage_tool/gen_golden/gen_golden.py

index 125a69cacf349a17552bdae9f9d9680bac5f53e7..79c86e6d715d1731bc99c2a718ba6eff2e852628 100755 (executable)
--- a/tools/nnpackage_tool/gen_golden/gen_golden.py
+++ b/tools/nnpackage_tool/gen_golden/gen_golden.py
@@ -91,6 +91,9 @@ if __name__ == '__main__':
              if this_dtype == tf.uint8:
                  input_values.append(
                      np.random.randint(0, 255, this_shape).astype(np.uint8))
+            if this_dtype == tf.int8:
+                input_values.append(
+                    np.random.randint(-127, 127, this_shape).astype(np.int8))
              elif this_dtype == tf.float32:
                  input_values.append(
                      np.random.random_sample(this_shape).astype(np.float32))
@@ -134,6 +137,9 @@ if __name__ == '__main__':
              if this_dtype == np.uint8:
                  input_values.append(
                      np.random.randint(0, 255, this_shape).astype(np.uint8))
+            if this_dtype == np.int8:
+                input_values.append(
+                    np.random.randint(-127, 127, this_shape).astype(np.int8))
              elif this_dtype == np.float32:
                  input_values.append(
                      np.random.random_sample(this_shape).astype(np.float32))
@@ -158,10 +164,11 @@ if __name__ == '__main__':
  
      # dump input and output in h5
      import h5py
-    supported_dtypes = ("float32", "uint8", "bool", "int32", "int64")
+    supported_dtypes = ("float32", "uint8", "int8", "bool", "int32", "int64")
      h5dtypes = {
          "float32": ">f4",
          "uint8": "u1",
+        "int8": "i1",
          "bool": "u1",
          "int32": "int32",
          "int64": "int64"
diff --git a/tools/pareto_profiler/README.md b/tools/pareto_profiler/README.md

new file mode 100644 (file)

index 0000000..85d999b
--- /dev/null
+++ b/tools/pareto_profiler/README.md
@@ -0,0 +1,95 @@
+This folder contains the necessary scripts to perform a pareto front estimation for machine learning models. Currently, the scripts support target devices running on Tizen, as well as `Odroid-XU4`.
+
+The contents of the folder can be categorized into the following groups:
+
+- [Generator scripts to map decision variables to `nnpackage_run` parameters](#mapping-decision-to-parameters)
+- [Estimator scripts to compute pareto front](#pareto-estimation)
+
+The following subsections describe the role of each script in detail.
+
+## Mapping Decision to Parameters
+The generator script `gen_oplist.py` is located under `generator` folder, and encodes large integer representations for `nnpackage` backend assignments. Effectively, it maps suitable backend assignments to integer values. For example, a graph with only three operations and two backends will have a integer representation in the range `(0, 7)`. Thus a value `0` might imply all operations run on `cpu`, while `7` might imply that all operations run on `acl_cl` backend. As will be described below, the integer representation of `nnpackage` parameters serves as a convenient decision space for pareto estimation.
+
+Setting up parameters for `nnpackage_run` requires a knowledge of model-specific operations. To this end, the `gen_oplist.py` script generates for each model, a `oplist` of unique operations. If an exhaustive mapping of backends to operation sequences is preferred, then `gen_oplist.py` also generates a so-called `opmap` list for uniquely observed `<operation name, data size>` pairs. 
+
+`gen_oplist.py` is run on the development environment (read: *Desktop PC*) as shown below:
+```
+python3 gen_oplist.py <tflite model> <target>
+```
+
+The list of model operations and their mapping to graph node indexes are stored in a  *oplist.json* file, and transferred to the target device. For further details about usage, type `python3 gen_oplist.py --help`.
+
+## Pareto Estimation
+Scripts under the `estimator` folder fall under two categories, namely an [exhaustive, brute-force profiling](#exhaustive-profiling), and a [on-device version of pareto estimation](#on-device-pareto-estimation). These are described in detail below.
+
+## Exhaustive Profiling
+For the sake of testing several pareto estimation algorithms *offline* on common lookup data, the `generator` folder includes a `brute_force_profiler.py` that records all solutions in the decision *or* assignment space. `brute_force_profiler.py` is typically run on target device, with the following syntax:
+
+```
+python brute_force_profiler.py <model> <target> <run_folder> [--dumpfile=<filename>]
+```
+For details, type `python brute_force_profiler.py --help`. Below is a example of the dump generated by the brute-force profiler:
+
+```
+{"oplist": ["Pool2D", "BinaryArithmetic", "DepthwiseConv2D", "Conv2D", "Reshape"], 
+  "solutions": [
+    {"memory": 56388, "id": 0, "time": 72.525}, 
+    {"memory": 63624, "id": 1, "time": 86.532}, 
+    {"memory": 64320, "id": 2, "time": 69.352}, 
+    {"memory": 65376, "id": 3, "time": 76.436}, 
+    {"memory": 73016, "id": 4, "time": 69.634}, 
+    {"memory": 73492, "id": 5, "time": 47.013}, 
+    {"memory": 74488, "id": 6, "time": 95.01}, 
+    {"memory": 74844, "id": 7, "time": 111.329}, 
+    {"memory": 393324, "id": 8, "time": 98.956}, 
+    {"memory": 395088, "id": 9, "time": 103.24}, 
+    {"memory": 396180, "id": 10, "time": 68.107}, 
+    {"memory": 395932, "id": 11, "time": 86.109}, 
+    {"memory": 402468, "id": 12, "time": 25.477}, 
+    {"memory": 402800, "id": 13, "time": 25.42}, 
+    {"memory": 403904, "id": 14, "time": 9.168}, 
+    {"memory": 404476, "id": 15, "time": 7.801}, 
+....
+    {"memory": 403940, "id": 30, "time": 9.145}, 
+    {"memory": 403568, "id": 31, "time": 8.034}]}
+```
+
+**Note**: As of present, the pareto estimation algorithms run on-device, and will support an *offline* mode in the near future.
+
+## On Device Pareto Estimation
+Currently the `estimator` folder includes only a `random_sampler.py`, however, in future, it will feature a set of pareto estimation algorithms. Regardless of the algorithm, the following steps must be carried out in sequence:
+
+1. Generate the oplist using `gen_oplist.py`, and transfer the JSON file to the target device. This step is performed on the development environment
+
+2. Copy the contents of the `estimator` folder to the target (*scp* for odroid, *sdb push* for tizen), at a preferred location
+
+3. On the target device, run the pareto-estimation algorithm. The following example shows how to run `random_sampler.py` (see `python random_sampler.py --help` for details)
+```
+python random_sampler.py /root/img_model/mobilenetv2/ /opt/usr/nnfw-test/Product/out/bin --mode=name --dumpfile=/tmp/mobilenetv2_opname_profile.json --iterations=20
+```
+After profiling, the results can be viewed under the filename provided by the `--dumpfile` argument. Below is an illustrative example of the same model that was brute-forced above:
+
+```
+{"configs": {
+  "4": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=cpu ", 
+  "10": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ", 
+  "14": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ", 
+  "16": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ", 
+  "20": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ", 
+  "21": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ", 
+  "31": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=acl_cl "}, 
+  "oplist": ["Pool2D", "DepthwiseConv2D", "Reshape", "Conv2D", "BinaryArithmetic"], 
+  "solutions": [
+    {"exec_time": 76.138, "max_rss": 62712, "id": 4}, 
+    {"exec_time": 72.719, "max_rss": 65272, "id": 16}, 
+    {"exec_time": 22.409, "max_rss": 403120, "id": 14}, 
+    {"exec_time": 28.138, "max_rss": 403064, "id": 10}, 
+    {"exec_time": 70.656, "max_rss": 65536, "id": 20}, 
+    {"exec_time": 68.805, "max_rss": 66076, "id": 21}, 
+    {"exec_time": 8.201, "max_rss": 404656, "id": 31}], "mode": "name"}
+```
+**Note**: The pareto-estimation algorithms require the use of python `numpy` package, so make sure to install it beforehand.
+
+
+
+
diff --git a/tools/pareto_profiler/estimator/Hlps.py b/tools/pareto_profiler/estimator/Hlps.py

new file mode 100644 (file)

index 0000000..ba0925d
--- /dev/null
+++ b/tools/pareto_profiler/estimator/Hlps.py
@@ -0,0 +1,257 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import sys
+import Queue
+import utils
+import signal
+from pareto import ParetoData
+
+
+class Hlps:
+    """ 
+    Initialize Runner and Pareto data structure
+  """
+
+    def __init__(self, runner, num_backends, num_samples):
+        self._runner = runner
+        self._num_backends = num_backends
+        self._num_samples = num_samples
+        self._marked = {}
+        self._extended_search = False
+        self._iteration = 0
+        self._pareto_obj = ParetoData()
+
+    """
+    Method to generate new samples from a given sample v_vec. 
+    The new samples bear a hamming distance hd from the provided sample.    
+  """
+
+    def gen_hamming(self, v_vec, hd=1, nsamples=None):
+        if nsamples is None:
+            nsamples = self._num_backends - 1
+        ret = np.zeros((nsamples, len(v_vec)), dtype=int)
+        v = v_vec
+        marked = np.full(len(v), False, dtype=bool)
+        cnt = 0
+
+        for r in range(nsamples):
+            ret[r] = v
+        rnd_pos = np.random.permutation(range(len(v)))
+        for i in range(hd):
+            pos = rnd_pos[i]
+            marked[pos] = True
+            for r in range(nsamples):
+                ret[r][pos] = (v[pos] - r - 1) % self._num_backends
+
+        return ret
+
+    """
+      Method to generate all samples from a given sample v_vec, that
+      have a hamming distance of one with respect to it.
+  """
+
+    def gen_hamming_one(self, v_vec, invert=False):
+        ret = np.zeros(((self._num_backends - 1) * len(v_vec), len(v_vec)), dtype=int)
+        if invert == False:
+            v = v_vec
+        else:
+            v = [1 - x for x in v_vec]
+        for nb in range(1, self._num_backends):
+            c = 0
+            for r in range((nb - 1) * len(v), nb * len(v)):
+                ret[r] = v
+                ret[r][c] = (v[c] - nb) % self._num_backends
+                c += 1
+        return ret
+
+    """
+      Enable profiling over extended search space
+  """
+
+    def enable_extended_search(self):
+        self._extended_search = True
+        for key in self._pareto_obj.get_pareto_keys():
+            config = self._pareto_obj.get_config(key)
+            extended_val = self._runner.get_extended_solution(config)
+            self._pareto_obj.set_config(key, extended_val)
+        self._iteration = 0
+
+    """
+      HLPS algorithm implementation provided here.
+      Description: Starting with a random sample, fill up a sampling 
+      queue with hamming neighbors. Fetch samples from queue,
+      each time checking for pareto optimality. Pareto-optimal samples
+      are then explored/exploited to generate new samples that are added to the queue.
+      Algorithm phase terminates when the queue is empty.
+      Repeat this phase in a multi-shot invokation for better results.
+  """
+
+    def hlps_routine(self, config_ids):
+        # Initialize
+        solution_q = Queue.Queue()
+        visited = {}
+        nbits = self._runner.get_nbits(self._extended_search)
+        is_extended = self._runner.get_mode_extended()
+        nsolutions = self._num_backends**nbits
+
+        stop_insert = False
+
+        cnt = 0
+        q_add_cnt = 0
+        round_cnt = 0
+
+        def extended_solution(s):
+            return self._runner.get_extended_solution(s)
+
+        def mark_solution(s):
+            if is_extended == True and self._extended_search == False:
+                self._marked[extended_solution(s)] = True
+            else:
+                self._marked[s] = True
+
+        def is_marked(s):
+            if is_extended == True and self._extended_search == False:
+                return (extended_solution(s) in self._marked)
+            else:
+                return (s in self._marked)
+
+        def visit_solution(s):
+            if is_extended == True and self._extended_search == False:
+                visited[extended_solution(s)] = True
+            else:
+                visited[s] = True
+
+        def is_visited(s):
+            if is_extended == True and self._extended_search == False:
+                return (extended_solution(s) in visited)
+            else:
+                return (s in visited)
+
+        def sigint_handler(signum, frame):
+            print("Round cnt = ", round_cnt)
+
+        signal.signal(signal.SIGINT, sigint_handler)
+        if len(config_ids) > 0:
+            for solution in config_ids:
+                if is_extended == True and self._extended_search == True and self._iteration == 0:
+                    s = extended_solution(solution)
+                else:
+                    s = solution
+                s_vec = utils.int_to_vec(s, self._num_backends, nbits)
+
+                candidate = self.gen_hamming_one(s_vec)
+                for hd in range((self._num_backends - 1) * nbits):
+                    candidate_int = int(''.join(str(x) for x in reversed(candidate[hd])),
+                                        self._num_backends)
+                    if is_marked(candidate_int) == False:
+                        solution_q.put(candidate_int)
+                        mark_solution(candidate_int)
+                        q_add_cnt += 1
+        else:
+            start_seed = int(np.random.rand() * (nsolutions))
+            solution_q.put(start_seed)
+            q_add_cnt += 1
+
+        self._iteration += 1
+        # Main routine
+        while not solution_q.empty():
+            s = solution_q.get()
+            mark_solution(s)
+            stop_insert = False
+            if (round_cnt % 100 == 0):
+                print("sample count = ", round_cnt)
+            if self._extended_search == True:
+                print("Queue size is ", solution_q.qsize())
+
+            if is_extended == True and self._extended_search == False:
+                time_val, memory_val = self._runner.profile_by_opname(s)
+            elif is_extended == True:
+                time_val, memory_val = self._runner.profile_by_opindex(s)
+            else:
+                time_val, memory_val = self._runner.profile_by_opname(s)
+            round_cnt += 1
+
+            utils.progressbar(round_cnt, nsolutions, prefix="% samples computed. : ")
+            self._pareto_obj.update_pareto_solutions(
+                s, time_val, memory_val, explore_flag=True)
+
+            for key in self._pareto_obj.get_pareto_keys():
+                pareto_sample = self._pareto_obj.get_config(key)
+                explore_sample = self._pareto_obj.get_exploration(key)
+
+                if is_visited(pareto_sample):
+                    continue
+                visit_solution(pareto_sample)
+                s_vec = utils.int_to_vec(pareto_sample, self._num_backends, nbits)
+
+                if explore_sample == True:
+                    # Explore solutions over a larger range
+                    for hd in range(1, nbits + 1):
+                        if stop_insert is True:
+                            break
+
+                        candidate = self.gen_hamming(s_vec, hd=hd)
+                        for i in range(self._num_backends - 1):
+                            if stop_insert is True:
+                                break
+                            candidate_int = int(
+                                ''.join(str(x) for x in reversed(candidate[i])),
+                                self._num_backends)
+                            try:
+                                if is_marked(candidate_int) == False:
+                                    solution_q.put(candidate_int)
+                                    q_add_cnt += 1
+                            except IndexError:
+                                print("candidate[i] = ", candidate[i],
+                                      ', candidate_int = ', candidate_int)
+                                sys.exit(-1)
+                            if (q_add_cnt >= self._num_samples):
+                                print("Queue full in explore")
+                                stop_insert = True
+                else:
+                    # Exploit solutions within immediate neighborhood
+                    candidate = self.gen_hamming_one(s_vec)
+
+                    for j in range((self._num_backends - 1) * nbits):
+                        if stop_insert is True:
+                            break
+                        candidate_int = int(
+                            ''.join(str(x) for x in reversed(candidate[j])),
+                            self._num_backends)
+                        if is_marked(candidate_int) == False:
+                            solution_q.put(candidate_int)
+                            q_add_cnt += 1
+                        if (q_add_cnt >= self._num_samples):
+                            print("Queue full in exploit")
+                            stop_insert = True
+                    self._pareto_obj.set_exploration(key)
+
+        pfront = set([
+            self._pareto_obj.get_config(key)
+            for key in self._pareto_obj.get_pareto_keys()
+        ])
+        return pfront, q_add_cnt
+
+    """
+      Method to dump results from HLPS
+  """
+
+    def dump_results(self, dumpdata):
+        dumpdata = self._pareto_obj.dump_pareto_solutions(dumpdata)
+        dumpdata = self._runner.dump_config(dumpdata)
+        return dumpdata
diff --git a/tools/pareto_profiler/estimator/brute_force_profiler.py b/tools/pareto_profiler/estimator/brute_force_profiler.py

new file mode 100644 (file)

index 0000000..9516fc3
--- /dev/null
+++ b/tools/pareto_profiler/estimator/brute_force_profiler.py
@@ -0,0 +1,71 @@
+#! /usr/bin/python
+import argparse
+import json
+import sys
+from profile_args import ProfileArgs
+from runner import Runner
+from utils import progressbar
+
+if __name__ == "__main__":
+    parser = ProfileArgs(
+        prog="brute_force_profiler.py", description="Profiles nnpackage_run using oplist")
+    # Parse arguments
+    args = parser.parse_args()
+    modelfile = args.model
+    mode = args.mode
+    n_backends = args.backends
+    dumpfile = args.dumpfile
+
+    # Initialize a runner for given model and target
+    runner = Runner(args.model, args.run_folder, args.backends, args.mode)
+    nruns = runner.get_solution_spacelen()
+    profile_results = {}
+    profile_results['solutions'] = []
+    chk_ptr = 0
+
+    # Profile each backend setting, record execution time and peak memory
+    for r in range(nruns):
+        if (r % 100) == 0:
+            # Checkpointing results, in case the runs take too long
+            if chk_ptr > 0:
+                with open("/tmp/solutions.json") as ifile:
+                    tmp_results = json.load(ifile)
+
+                with open("/tmp/solutions.json", "w") as ofile:
+                    json.dump(tmp_results + profile_results['solutions'][chk_ptr:], ofile)
+            else:
+                with open("/tmp/solutions.json", "w") as ofile:
+                    json.dump(profile_results['solutions'], ofile)
+            chk_ptr = r
+
+        if args.mode == "name":
+            exec_time, max_rss = runner.profile_by_opname(r)
+        elif args.mode == "index":
+            exec_time, max_rss = runner.profile_by_opindex(r)
+        else:
+            print("Invalid mode ", mode)
+            sys.exit(-1)
+
+        profile_results['solutions'].append({
+            "time": exec_time,
+            "memory": max_rss,
+            "id": r
+        })
+        progressbar(r, nruns, prefix="% samples computed. : ")
+    progressbar(nruns, nruns, prefix="% samples computed. : ")
+
+    oplist, opmap, opname_by_indx = runner.get_opconfig()
+
+    if args.mode == "index":
+        profile_results['oplist'] = oplist
+        profile_results['opmap'] = opmap
+        profile_results['opname_by_indx'] = opname_by_indx
+    elif args.mode == "name":
+        profile_results['oplist'] = oplist
+    else:
+        print("Invalid mode ", mode)
+        sys.exit(-1)
+
+    with open(dumpfile, "w") as ofile:
+        json.dump(profile_results, ofile)
+    print "\nDone.."
diff --git a/tools/pareto_profiler/estimator/hlps_sampler.py b/tools/pareto_profiler/estimator/hlps_sampler.py

new file mode 100644 (file)

index 0000000..a4c1e4f
--- /dev/null
+++ b/tools/pareto_profiler/estimator/hlps_sampler.py
@@ -0,0 +1,99 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import utils
+import sys
+import json
+import time
+from Hlps import Hlps
+from profile_args import ProfileArgs
+from runner import Runner
+
+
+def hlps_profiler(modelfile,
+                  run_folder,
+                  num_backends=2,
+                  mode="name",
+                  nruns=3,
+                  num_samples=2000,
+                  dumpfile=None):
+    runner = Runner(modelfile, run_folder, num_backends, mode=mode)
+    hlps = Hlps(runner, num_backends=num_backends, num_samples=num_samples)
+
+    config_set = set()
+    sample_cnt = 0
+    total_reject_list = []
+
+    for r in range(nruns):
+        config_set, sample_cnt_iter = hlps.hlps_routine(config_set)
+        sample_cnt += sample_cnt_iter
+
+    # Add the index mode search here.
+    print("Starting search over extended space")
+    print("\n")
+    if mode == "index":
+        hlps.enable_extended_search()
+        for r in range(nruns):
+            config_set, sample_cnt_iter = hlps.hlps_routine(config_set)
+            sample_cnt += sample_cnt_iter
+
+    # Export results to json file
+    # Dump profiler results
+    dumpdata = {}
+    dumpdata['mode'] = args.mode
+    dumpdata['sample_cnt'] = sample_cnt
+    dumpdata = hlps.dump_results(dumpdata)
+    with open(dumpfile, "w") as ofile:
+        json.dump(dumpdata, ofile)
+
+
+if __name__ == "__main__":
+    t_start = time.time()
+    parser = ProfileArgs(
+        "hlps_on_device.py",
+        description="On-Device Optimizing Profiler for TensorFlowLite Models")
+    parser.add_argument(
+        '--iterations',
+        type=int,
+        default=3,
+        help='Number of iterations, less than 10 should be enough')
+    parser.add_argument(
+        '--samples', type=int, default=2000, help='Number of samples per iteration')
+    parser.add_argument(
+        '--offline',
+        type=bool,
+        default=False,
+        help='Set to True for running over profiled data')
+    parser.add_argument('--profiled_data', type=str, help='Profile file with path')
+
+    args = parser.parse_args()
+
+    hlps_profiler(
+        args.model,
+        args.run_folder,
+        num_backends=args.backends,
+        mode=args.mode,
+        nruns=args.iterations,
+        num_samples=args.samples,
+        dumpfile=args.dumpfile)
+    t_end = time.time()
+    with open(args.dumpfile, "r") as ifile:
+        dumpdata = json.load(ifile)
+    dumpdata['profiling time'] = (t_end - t_start)
+    with open(args.dumpfile, "w") as ofile:
+        json.dump(dumpdata, ofile)
+    print("done.., profiling time = ", (t_end - t_start), " seconds")
diff --git a/tools/pareto_profiler/estimator/pareto.py b/tools/pareto_profiler/estimator/pareto.py

new file mode 100644 (file)

index 0000000..9c62eb3
--- /dev/null
+++ b/tools/pareto_profiler/estimator/pareto.py
@@ -0,0 +1,84 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class ParetoData:
+    def __init__(self):
+        self._pareto_solutions = {}
+        self._configs = {}
+        self._cnt = 0
+        self._explore = {}
+
+    def add_pareto_entry(self,
+                         sample,
+                         exec_time,
+                         max_rss,
+                         key,
+                         explore_flag,
+                         check_one_hop=True):
+        self._pareto_solutions[key] = [exec_time, max_rss]
+        self._configs[key] = sample
+        if explore_flag == True and check_one_hop == True:
+            self._explore[key] = False
+        elif explore_flag == True and check_one_hop == False:
+            self._explore[key] = True
+
+    def update_pareto_solutions(self, sample, exec_time, max_rss, explore_flag=False):
+        new_item = True
+        if self._pareto_solutions:
+            for key in list(self._pareto_solutions):
+                if self._pareto_solutions[key][0] < exec_time and self._pareto_solutions[key][1] < max_rss:
+                    new_item = False
+                    break
+                elif self._pareto_solutions[key][0] > exec_time and self._pareto_solutions[key][1] > max_rss:
+                    self.add_pareto_entry(sample, exec_time, max_rss, key, explore_flag,
+                                          True)
+                    new_item = False
+
+        if new_item is True:
+            self.add_pareto_entry(sample, exec_time, max_rss, self._cnt, explore_flag,
+                                  False)
+            self._cnt += 1
+
+    def dump_pareto_solutions(self, dumpdata):
+        marked = {}
+        pareto_results = []
+        for i in range(self._cnt):
+            if self._configs[i] not in marked:
+                marked[self._configs[i]] = True
+                pareto_results.append({
+                    "id": self._configs[i],
+                    "exec_time": self._pareto_solutions[i][0],
+                    "max_rss": self._pareto_solutions[i][1]
+                })
+        dumpdata.update({"solutions": pareto_results})
+
+        return dumpdata
+
+    def get_pareto_keys(self):
+        return self._configs.keys()
+
+    def get_config(self, key):
+        return self._configs[key]
+
+    def get_exploration(self, key):
+        return self._explore[key]
+
+    def set_exploration(self, key):
+        self._explore[key] = True
+
+    def set_config(self, key, extended_value):
+        self._configs[key] = extended_value
diff --git a/tools/pareto_profiler/estimator/profile_args.py b/tools/pareto_profiler/estimator/profile_args.py

new file mode 100644 (file)

index 0000000..c4e019d
--- /dev/null
+++ b/tools/pareto_profiler/estimator/profile_args.py
@@ -0,0 +1,37 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+
+class ProfileArgs(argparse.ArgumentParser):
+    def __init__(self, *args, **kwargs):
+        super(ProfileArgs, self).__init__(args, kwargs)
+        self.add_argument(
+            'model', type=str, default=None, help='nnpackage name with path')
+        self.add_argument('run_folder', type=str, help="path to nnpackage_run executable")
+        self.add_argument(
+            '--mode',
+            type=str.lower,
+            choices=["index", "name"],
+            default="name",
+            help='Profile by operation index or name')
+        self.add_argument('--backends', type=int, default=2, help='Number of backends')
+        self.add_argument(
+            '--dumpfile',
+            type=str.lower,
+            default="/tmp/final_result.json",
+            help='JSON Dumpfile name with path')
diff --git a/tools/pareto_profiler/estimator/random_sampler.py b/tools/pareto_profiler/estimator/random_sampler.py

new file mode 100644 (file)

index 0000000..7646ea6
--- /dev/null
+++ b/tools/pareto_profiler/estimator/random_sampler.py
@@ -0,0 +1,60 @@
+#! /usr/bin/python
+import argparse
+import json
+import numpy as np
+import sys
+import subprocess
+import time
+from pareto import ParetoData
+from profile_args import ProfileArgs
+from runner import Runner
+from utils import progressbar
+
+if __name__ == "__main__":
+    t_start = time.time()
+    parser = ProfileArgs("random_sampler.py", description="Random sampler")
+    parser.add_argument(
+        '--iterations', type=int, default=100, help='Number of iterations')
+
+    # Parse arguments
+    args = parser.parse_args()
+    dumpfile = args.dumpfile
+    iterations = args.iterations
+
+    # Initialize a runner and Pareto data structure obj
+    runner = Runner(args.model, args.run_folder, args.backends, args.mode)
+    pareto_obj = ParetoData()
+    # Initialize variables for random sampler
+    n_assignments = runner.get_solution_spacelen()
+    n_iterations = min(iterations, n_assignments)
+    chk_ptr = 0
+    marked_samples = {}
+
+    # Profile at random over solution space
+    for r in range(n_iterations):
+        random_sample = int(np.random.rand() * n_assignments)
+        while random_sample in marked_samples:
+            random_sample = int(np.random.rand() * n_assignments)
+        marked_samples[random_sample] = True
+        if args.mode == "name":
+            exec_time, max_rss = runner.profile_by_opname(random_sample)
+        elif args.mode == "index":
+            exec_time, max_rss = runner.profile_by_opindex(random_sample)
+        else:
+            print("Invalid mode ", mode)
+            sys.exit(-1)
+
+        pareto_obj.update_pareto_solutions(random_sample, exec_time, max_rss)
+        progressbar(r, n_assignments, prefix="% samples computed. : ")
+    progressbar(r + 1, n_assignments, prefix="% samples computed. : ")
+
+    # Dump profiler results
+    dumpdata = {}
+    dumpdata['mode'] = args.mode
+    dumpdata = pareto_obj.dump_pareto_solutions(dumpdata)
+    dumpdata = runner.dump_config(dumpdata)
+    with open(dumpfile, "w") as ofile:
+        json.dump(dumpdata, ofile)
+    t_end = time.time()
+    print("\n")
+    print("done.., profiling time = ", (t_end - t_start), " seconds")
diff --git a/tools/pareto_profiler/estimator/runner.py b/tools/pareto_profiler/estimator/runner.py

new file mode 100644 (file)

index 0000000..d2b66d6
--- /dev/null
+++ b/tools/pareto_profiler/estimator/runner.py
@@ -0,0 +1,148 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+from utils import fetch_config_by_name
+from utils import fetch_config_by_indx
+from utils import generate_vars
+from utils import generate_vars_for_indx
+from utils import exec_shell
+from utils import import_configs
+from utils import int_to_vec
+import sys
+
+
+class Mapper:
+    def __init__(self, opmap, oplist, opname_by_index):
+        self._opmap = opmap
+        self._oplist = oplist
+        self._opname_by_indx = opname_by_index
+
+    def get_oplist(self):
+        return self._oplist
+
+    def get_opmap(self):
+        return self._opmap
+
+    def get_opname_by_indx(self):
+        return self._opname_by_indx
+
+    def get_indices(self, value):
+        indx_list = []
+        for i in range(len(self._opname_by_indx)):
+            if self._opname_by_indx[i] == value:
+                indx_list.append(i)
+        return indx_list
+
+    def map_to_extended_space(self, n, backends):
+        n_vec = int_to_vec(n, backends, len(self._oplist))
+        extended_vec = np.zeros(max(self._opmap) + 1, dtype=int)
+        cnt = 0
+
+        for allocation in n_vec:
+            extended_pos = list(
+                set([self._opmap[i] for i in self.get_indices(self._oplist[cnt])]))
+            try:
+                extended_vec[extended_pos] = allocation
+            except IndexError:
+                print("extended_vec size = ", extended_vec.size, ", extended_pos = ",
+                      extended_pos)
+            cnt += 1
+        extended_n = int(''.join(str(i) for i in extended_vec[::-1]), 2)
+        return extended_n
+
+
+class Runner:
+    def __init__(self, model, run_folder, num_backends, mode):
+        self._model = model
+        self._run_folder = run_folder
+        self._mode = mode
+        oplist, opmap, opname_by_index = import_configs(mode)
+        self._mapper = Mapper(opmap, oplist, opname_by_index)
+        self._nbackends = num_backends
+        self._extended_map = {}
+
+    def get_solution_spacelen(self):
+        if self._mode == "name":
+            return self._nbackends**len(self._mapper.get_oplist())
+        elif self._mode == "index":
+            return self._nbackends**max(self._mapper.get_opmap())
+        else:
+            print("Unknown mode ", mode, ", exiting profiler")
+            sys.exit(-1)
+
+    def get_nbits(self, extended_search_mode):
+        if self._mode == "index" and extended_search_mode == True:
+            return max(self._mapper.get_opmap())
+        else:
+            return len(self._mapper.get_oplist())
+
+    def get_mode_extended(self):
+        return (self._mode == "index")
+
+    def get_extended_solution(self, s):
+        if s in self._extended_map:
+            return self._extended_map[s]
+
+        extended_value = self._mapper.map_to_extended_space(s, self._nbackends)
+        self._extended_map[s] = extended_value
+        return extended_value
+
+    def run_inference(self, solution):
+        cmd_str = [
+            ". /tmp/envvars.sh && " + self._run_folder + "/nnpackage_run -w1 -r1 -m1 -l "
+            + self._model + "/metadata/tc/input.h5 " + self._model + " 2> /dev/null"
+        ]
+        res = exec_shell(cmd_str, newline_split=True)
+        try:
+            exec_time = float(res[4].split(' ')[-2])
+            max_rss = int(res[13].split(' ')[-2])
+        except IndexError:
+            print("got index error at config ", solution)
+            print("result: ", res)
+            print("####")
+            sys.exit(-1)
+        return (exec_time, max_rss)
+
+    def profile_by_opname(self, solution):
+        generate_vars(self._mapper.get_oplist(), solution, self._nbackends)
+        return self.run_inference(solution)
+
+    def profile_by_opindex(self, solution):
+        generate_vars_for_indx(self._mapper.get_opmap(), solution, self._nbackends)
+        return self.run_inference(solution)
+
+    def get_opconfig(self):
+        return self._mapper.get_oplist(), self._mapper.get_opmap(
+        ), self._mapper.get_opname_by_indx()
+
+    def dump_config(self, dumpdata):
+        if self._mode == "name":
+            dumpdata.update({'oplist': self._mapper.get_oplist()})
+        elif self._mode == "index":
+            dumpdata.update({'oplist': self._mapper.get_opmap()})
+
+        configs = {}
+        for solution in dumpdata['solutions']:
+            if self._mode == "name":
+                configs[int(solution["id"])] = fetch_config_by_name(
+                    dumpdata['oplist'], solution["id"], self._nbackends)
+            elif self._mode == "index":
+                configs[int(solution["id"])] = fetch_config_by_indx(
+                    dumpdata['oplist'], solution["id"], self._nbackends)
+        dumpdata.update({'configs': configs})
+        return dumpdata
diff --git a/tools/pareto_profiler/estimator/utils.py b/tools/pareto_profiler/estimator/utils.py

new file mode 100644 (file)

index 0000000..9278674
--- /dev/null
+++ b/tools/pareto_profiler/estimator/utils.py
@@ -0,0 +1,201 @@
+#! /usr/bin/python
+import subprocess
+import numpy as np
+import sys
+import os
+import json
+"""
+  General executor for bash-like shell. Supports multiline results.
+"""
+
+
+def exec_shell(command_str, newline_split=False):
+    result = subprocess.Popen(command_str, shell=True, stdout=subprocess.PIPE)
+    out, err = result.communicate()
+    if (newline_split):
+        res = out.decode("utf-8").split('\n')
+        res = res[:-1]
+        return res
+    else:
+        return out.decode("utf-8").split("\n")[0]
+
+
+"""
+  Given a number and its base, return its symbol-wise vector representation
+"""
+
+
+def int_to_vec(n, b, n_operations):
+    number_arr = np.zeros(n_operations, dtype=int)
+    i = n_operations - 1
+    while (n != 0):
+        number_arr[i] = n % b
+        n = n // b
+        i -= 1
+
+    return number_arr[::-1]
+
+
+"""
+  Generate onert backend mapping for each graph node, give the encoded information in parameters.
+  The details of the parameters are as follows:
+  1. oplist     - a vector that maps each graph node to a unique <operation name, data size> id 
+                  that was generated by an earlier script (gen_oplist.py)
+  2. number     - the encoded backend assignment, typically a very long integer 
+  3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def generate_vars_for_indx(oplist, number, base_value):
+    ofile = open('/tmp/envvars.sh', 'w')
+    backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+
+    if (base_value == 2):
+        ofile.write("export BACKENDS=\"acl_cl;cpu\"")
+    elif (base_value == 3):
+        ofile.write("export BACKENDS=\"acl_cl;acl_neon;cpu\"")
+    ofile.write("\n")
+    number_arr = int_to_vec(number, base_value, len(oplist))
+    cnt = 0
+    op_backend_map_str = "export OP_BACKEND_MAP=\""
+    for cnt in range(len(oplist)):
+        backend_str = backend_map[int(number_arr[oplist[cnt]])]
+        op_backend_map_str += ''.join([str(cnt), backend_str])
+
+        if (cnt < (len(oplist) - 1)):
+            op_backend_map_str += ";"
+        else:
+            op_backend_map_str += "\""
+    ofile.write(op_backend_map_str)
+    ofile.write("\n")
+    ofile.close()
+
+
+"""
+  Print onert backend mapping for each graph node, give the encoded information in parameters.
+  The details of the parameters are as follows:
+  1. oplist     - a vector that maps each graph node to a unique <operation name, data size> id 
+                  that was generated by an earlier script (gen_oplist.py)
+  2. number     - the encoded backend assignment, typically a very long integer 
+  3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def fetch_config_by_indx(oplist, number, base_value):
+    var_str = ""
+    backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+
+    if (base_value == 2):
+        var_str += "BACKENDS=\"acl_cl;cpu\""
+    elif (base_value == 3):
+        var_str += "BACKENDS=\"acl_cl;acl_neon;cpu\""
+    var_str += " "
+    number_arr = int_to_vec(number, base_value, len(oplist))
+    cnt = 0
+    var_str += "OP_BACKEND_MAP=\""
+    op_backend_map_str = ""
+    for cnt in range(len(oplist)):
+        backend_str = backend_map[int(number_arr[oplist[cnt]])]
+        op_backend_map_str += ''.join([str(cnt), backend_str])
+
+        if (cnt < (len(oplist) - 1)):
+            op_backend_map_str += ";"
+        else:
+            op_backend_map_str += "\""
+    var_str += op_backend_map_str
+    return var_str
+
+
+"""
+  Generate onert backend mapping for each graph operation name, give the encoded information in parameters.
+  The details of the parameters are as follows:
+  1. oplist     - a vector that maps each graph node to a unique operation name. 
+                  The list is generated by an earlier script (gen_oplist.py)
+  2. number     - the encoded backend assignment, typically a long integer 
+  3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def generate_vars(oplist, number, base_value):
+    ofile = open('/tmp/envvars.sh', 'w')
+    backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+    if (base_value == 2):
+        ofile.write("export BACKENDS=\"acl_cl;cpu\"")
+    elif (base_value == 3):
+        ofile.write("export BACKENDS=\"acl_cl;acl_neon;cpu\"")
+    ofile.write("\n")
+    number_str = int_to_vec(number, base_value, len(oplist))
+
+    cnt = 0
+    for n in number_str:
+        op_backend_map_str = ''.join(
+            ["export OP_BACKEND_", oplist[cnt], backend_map[int(n)]])
+        ofile.write(op_backend_map_str)
+        ofile.write("\n")
+        cnt += 1
+    ofile.close()
+
+
+"""
+  Print onert backend mapping for each graph operation name, give the encoded information in parameters.
+  The details of the parameters are as follows:
+  1. oplist     - a vector that maps each graph node to a unique operation name. 
+                  The list is generated by an earlier script (gen_oplist.py)
+  2. number     - the encoded backend assignment, typically a long integer 
+  3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def fetch_config_by_name(oplist, number, base_value):
+    var_str = ""
+    backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+    if (base_value == 2):
+        var_str += "BACKENDS=\"acl_cl;cpu\""
+    elif (base_value == 3):
+        var_str += "BACKENDS=\"acl_cl;acl_neon;cpu\""
+    var_str += " "
+
+    number_str = int_to_vec(number, base_value, len(oplist))
+
+    cnt = 0
+    for n in number_str:
+        var_str += ''.join(["OP_BACKEND_", oplist[cnt], backend_map[int(n)]])
+        var_str += " "
+        cnt += 1
+    return var_str
+
+
+"""
+  Import operation list, map and relevant information for profiling. Note: These information should have been
+  dumped under /tmp/oplist.json using the gen_oplist.py script.
+"""
+
+
+def import_configs(mode):
+    if not os.path.isfile('/tmp/oplist.json'):
+        print("No oplist")
+        sys.exit(-1)
+    with open('/tmp/oplist.json', 'r') as ifile:
+        data = json.load(ifile)
+    oplist = data['oplist']
+    if mode == "name":
+        nbits = len(oplist)
+        return oplist, None, None
+    elif mode == "index":
+        opmap = data['opmap']
+        opname_by_indx = data['opname_by_indx']
+        return oplist, opmap, opname_by_indx
+
+    print("mode is incorrect")
+    sys.exit(-1)
+
+
+"""
+  Generic Progress bar display
+"""
+
+
+def progressbar(current_cnt, max_cnt, prefix="", file=sys.stdout):
+    x = int(current_cnt * 100.0 / max_cnt)
+    file.write("%s[%s%s] %i/%i\r" % (prefix, "#" * x, "." * (100 - x), x, 100))
+    file.flush()
diff --git a/tools/pareto_profiler/generator/gen_oplist.py b/tools/pareto_profiler/generator/gen_oplist.py

new file mode 100644 (file)

index 0000000..5511937
--- /dev/null
+++ b/tools/pareto_profiler/generator/gen_oplist.py
@@ -0,0 +1,165 @@
+#! /usr/bin/python
+import argparse
+import tensorflow as tf
+import sys
+sys.path.append("../estimator")
+import subprocess
+import os
+import json
+from functools import reduce
+from utils import exec_shell
+"""
+  Generates from a tflite model, a list of unique onert operation names used in the model
+"""
+
+
+def generate_oplist_by_name(tflite_file):
+    with open("operations_map.json") as ifile:
+        data = json.load(ifile)
+    op_dict = data['op_dict']
+
+    intr = tf.lite.Interpreter(tflite_file)
+    intr.allocate_tensors()
+    tf_opset = set(op['op_name'] for op in intr._get_ops_details())
+    try:
+        onert_ops = set([op_dict[op] for op in tf_opset])
+    except KeyError:
+        print("Invalid mapping, check your tensorflow ops for new/unknown mappings: ",
+              tf_opset)
+        sys.exit(-1)
+    return onert_ops
+
+
+"""
+  Returns the total data size for the model graph node (inputs + outputs)
+  Params:
+  op: operation instance (obtained from _get_ops_details())
+  tsr: tensor instance (obtained from get_tensor_details()) 
+"""
+
+
+def get_op_data_size(op, tsr):
+    data_size = 0
+    for idx in op['inputs']:
+        if tsr[idx]['shape'].size > 0:
+            data_size += reduce(lambda x, y: x * y,
+                                tsr[idx]['shape']) * tsr[idx]['shape'].dtype.itemsize
+
+    for idx in op['outputs']:
+        if tsr[idx]['shape'].size > 0:
+            data_size += reduce(lambda x, y: x * y,
+                                tsr[idx]['shape']) * tsr[idx]['shape'].dtype.itemsize
+    return data_size
+
+
+"""
+  Generates from a tflite model, the following outputs:
+  1.  opmap - a symbol/bit index mapping from every graph operation to a unique <operation name, data size> index identifier. This mapping
+      will be used later when profiling the model at runtime.
+
+  2.  oplist - a list of unique onert operation names used in the model
+
+  3.  opname_by_index - a list of onert operation names, indexed by their topological order in the model
+"""
+
+
+def generate_oplist_by_name_size(tflite_file):
+    intr = tf.lite.Interpreter(tflite_file)
+    intr.allocate_tensors()
+    ops = intr._get_ops_details()
+    tsr = intr.get_tensor_details()
+
+    opset = set()
+    oplist = set()
+    indx = []
+    opname_by_indx = []
+    # Fetch tensorflow operation mapping to onert kernels
+    with open("operations_map.json") as ifile:
+        data = json.load(ifile)
+    op_dict = data['op_dict']
+
+    # Fetch all unique operation names and <operation name, tensordata size> pairs
+    for op in ops:
+        opset.add((op['op_name'], get_op_data_size(op, tsr)))
+        oplist.add(op_dict[op['op_name']])
+        indx.append(op['index'])
+    opname_by_indx = [op_dict[ops[i]['op_name']] for i in indx]
+
+    # Create a 'm' bit/symbol map indexed by <opname, tensordata size> values
+    inv_opset_map = {}
+    i = 0
+    for op in opset:
+        inv_opset_map[op] = i
+        i += 1
+
+    # Map 'n' operation symbol space to 'm' <opname, tensordata size> space
+    op_map = []
+    for op in ops:
+        data_size = get_op_data_size(op, tsr)
+        op_map.append(inv_opset_map[(op['op_name'], data_size)])
+
+    return op_map, oplist, opname_by_indx
+
+
+"""
+Script to generate oplist, given the following details:
+1. Modelfile
+2. target device type
+3. Additional information, such as authetication for file tranfer
+
+Info: python gen_oplist.py --help
+"""
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='''gen_backend: Generates oplist and uploads to target''',
+        epilog="""Success.""")
+    parser.add_argument(
+        '--auth', type=str, default=None, help='authentication: <user@host>')
+    parser.add_argument(
+        '--mode',
+        type=str.lower,
+        choices=["index", "name"],
+        default="name",
+        help='Profile by operation index or name')
+    parser.add_argument('model', type=str, default=None, help='tflite name with path')
+    parser.add_argument(
+        'target',
+        type=str.lower,
+        choices=['tizen', 'odroid'],
+        default="odroid",
+        help='target name')
+
+    # Parse arguments
+    args = parser.parse_args()
+    modelfile = args.model
+    target = args.target
+    mode = args.mode
+    if target == "odroid":
+        auth_str = args.auth
+        if auth_str is None:
+            print("Need valid authentication")
+            sys.exit(-1)
+
+    # Generate oplist
+    if mode == "name":
+        opset = generate_oplist_by_name(modelfile)
+        print(opset)
+        with open('/tmp/oplist.json', 'w') as opfile:
+            data = {}
+            data['oplist'] = list(opset)
+            json.dump(data, opfile)
+    elif mode == "index":
+        data = {}
+        opmap, oplist, opname_by_indx = generate_oplist_by_name_size(modelfile)
+        data['opmap'] = opmap
+        data['oplist'] = list(oplist)
+        data['opname_by_indx'] = opname_by_indx
+        with open('/tmp/oplist.json', 'w') as opfile:
+            json.dump(data, opfile)
+    # Upload oplist to target
+    if target == "tizen":
+        exec_shell("sdb push /tmp/oplist.json /tmp/oplist.json")
+    elif target == "odroid":
+        print("auth_str = ", auth_str)
+        exec_shell("scp  /tmp/oplist.json " + auth_str + ":/tmp/oplist.json")
+    print("done...")
diff --git a/tools/pareto_profiler/generator/operations_map.json b/tools/pareto_profiler/generator/operations_map.json

new file mode 100644 (file)

index 0000000..c35547e
--- /dev/null
+++ b/tools/pareto_profiler/generator/operations_map.json
@@ -0,0 +1,36 @@
+{ "op_dict": {
+  "SUM":"Reduce",
+  "ADD":"BinaryArithmetic",
+  "SUB":"BinaryArithmetic",
+  "DIV":"BinaryArithmetic",
+  "MUL":"BinaryArithmetic",
+  "REDUCE_MAX": "Reduce",
+  "REDUCE_MIN": "Reduce",
+  "CONV_2D": "Conv2D",
+  "PACK":"Pack",
+  "SOFTMAX":"Softmax",
+  "CONCATENATION":"Concat",
+  "EXP":"ElementwiseUnary",
+  "RESHAPE":"Reshape",
+  "SPLIT_V":"SplitV",
+  "ARG_MAX": "ArgMax",
+  "BATCH_TO_SPACE_ND":"BatchToSpaceND",
+  "DEPTHWISE_CONV_2D":"DepthwiseConv2D",
+  "LOGISTIC":"ElementwiseActivation",
+  "MEAN":"Reduce",
+  "RELU6":"ElementwiseActivation",
+  "RELU":"ElementwiseActivation",
+  "RESIZE_BILINEAR":"ResizeBilinear",
+  "REVERSE_V2":"Reverse",
+  "SPACE_TO_BATCH_ND":"SpaceToBatchND",
+  "AVERAGE_POOL_2D": "Pool2D",
+  "MAX_POOL_2D": "Pool2D",
+  "GATHER": "Gather",
+  "CAST": "ElementwiseUnary",
+  "FULLY_CONNECTED": "FullyConnected",
+  "PAD": "Pad",
+  "SLICE" : "Slice",
+  "STRIDED_SLICE": "StridedSlice",
+  "TRANSPOSE": "Transpose",
+  "UNPACK": "Unpack"
+}}
diff --git a/tools/stab/README.md b/tools/stab/README.md

new file mode 100644 (file)

index 0000000..c52ba41
--- /dev/null
+++ b/tools/stab/README.md
@@ -0,0 +1,54 @@
+# Stab - Static Backend Scheduler
+
+`Stab` is a tool to schedule backend for each opration using profiled data
+
+nnpackage with backend configuration will be created at `./tools/stab/nnpkg_sched`
+
+Supported backends : `cpu`, `ruy`, and `xnnpack`
+- Other backends will be supported when `stab` can measure and use permutation time between backends
+
+## Scheduling Process
+
+1. Upload ONE runtime and nnpackage to remote device
+   - Use `/tmp/ONE` folder on remote device
+1. Profile execution time of each backend on remote device
+1. Get profile result from remote device
+   - Profile result is saved at `./tools/stab/traces` on host
+1. Scheduler backend for each operation to get fastest inference time
+   - Use fastest backend for each operation
+1. Generate nnpackage with backend configuration
+   - Generated at `./tools/stab/nnpkg_sched`
+
+## Prerequisite
+
+- Install Python>=3. Tested on Python 3.6.9 and 3.7.5
+- Register SSH keys to use ssh commands without entering password
+  ```bash
+  ssh-keygen -t rsa
+  ssh-copy-id -i ~/.ssh/id_rsa.pub remote_user@remote_ip
+  ```
+
+## Usage
+
+```
+Usage: python3 ./tools/stab/stab.py --nnpackage nnpackage_dir --ip <IP>
+Runs nnpackage on remote device and create nnpackaged with scheduled backends
+
+required arguments:
+  --nnpackage NNPACKAGE
+                        nnpackage folder to profile
+  --ip IP               IP address of remote client
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -n NUM_THREADS, --num_threads NUM_THREADS
+                        Number of threads used by one runtime
+  -u USER, --user USER  User of remote client
+  -v, --verbose         Print verbose message
+  --no-profile          Disable profiling
+
+Examples:
+    python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1               => Profile on remote device 1.1.1.1 with current user
+    python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1 -n 4          => Profile on remote device 1.1.1.1 using 4 thread for ONE runtime
+    python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1 --user odroid => Profile on remote device 1.1.1.1 with user odroid
+```
diff --git a/tools/stab/backend_profiler.py b/tools/stab/backend_profiler.py

new file mode 100644 (file)

index 0000000..c9d7133
--- /dev/null
+++ b/tools/stab/backend_profiler.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from op_list_parser import OpListParser
+from remote import RemoteSSH
+
+
+class BackendProfiler():
+    """
+    Run ONE runtime on remote device to create TRACE file which has operation execution time
+
+    TODO : Support Android device profiling
+    """
+
+    def __init__(self, user, ip, nnpackage_dir, num_threads):
+        self.remote_ssh = RemoteSSH(user, ip, nnpackage_dir, num_threads)
+        self.backend_op_list = OpListParser().parse()
+        self.backend_list = ["cpu"]
+        self.backend_list.extend([backend for backend in self.backend_op_list])
+
+    def sync(self):
+        logging.info("Upload ONE runtime and nnpackage to remote device")
+        self.remote_ssh.sync_binary()
+
+    def profile(self):
+        for backend in self.backend_list:
+            logging.info(f"Profiling {backend} backend")
+            self.remote_ssh.profile_backend(backend, self.backend_op_list)
+            self.remote_ssh.sync_trace(backend)
diff --git a/tools/stab/backend_scheduler.py b/tools/stab/backend_scheduler.py

new file mode 100644 (file)

index 0000000..e18a155
--- /dev/null
+++ b/tools/stab/backend_scheduler.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json, logging
+from pathlib import Path
+from op_list_parser import OpListParser
+from nnpkg_helper import NnpkgHelper
+
+
+class BackendScheduler:
+    """
+    Read profiled data and select proper backend for each operation
+    Scheduled nnpackage is saved at ./tools/stab/nnpkg_sched
+
+    TODO : Use permutation time for better scheduling
+    """
+
+    def __init__(self, nnpkg_dir, num_threads):
+        self.nnpkg_dir = Path(nnpkg_dir).resolve()
+        self.num_threads = num_threads
+        self.root_path = Path(__file__).parents[2]
+        self.nnpkg_helper = NnpkgHelper()
+
+    def read_traces(self, backend_list):
+        op_time = {}
+        inference_time = {}
+        for backend in backend_list:
+            try:
+                # Trace file is located at ./tools/stab/traces
+                trace_path = Path(
+                    __file__
+                ).parent / 'traces' / f"{self.nnpkg_dir.name}_{backend}_{self.num_threads}"
+                logging.debug(f"Trace path : {trace_path}")
+                with open(trace_path) as f:
+                    data = json.load(f)
+                    execution_data = data['Execution_Data']
+                    for entry in execution_data:
+                        if entry == "memory":
+                            continue
+                        elif entry == "runtime":
+                            inference_time[backend] = execution_data['runtime']['Graph'][
+                                'Avg_Time']
+                            continue
+                        op_backend = entry
+                        backend_data = execution_data[op_backend]
+                        for op in backend_data:
+                            op_index = int(op.split(' ')[2][1:])
+                            op_type = op.split(' ')[-1]
+                            time = int(backend_data[op]["Avg_Time"])
+                            if op_index not in op_time.keys():
+                                op_time[op_index] = {op_backend: time}
+                                op_time[op_index].update({"type": op_type})
+                            else:
+                                op_time[op_index].update({op_backend: time})
+            except IOError as e:
+                logging.warning(e)
+        return op_time, inference_time
+
+    def schedule(self):
+        backend_op_list = OpListParser().parse()
+        backend_list = ["cpu"]
+        backend_list.extend([backend for backend in backend_op_list])
+
+        op_time, backend_infer_time = self.read_traces(backend_list)
+
+        backend_mapping = {}
+
+        target_ops = set()
+        for _, v in backend_op_list.items():
+            target_ops.update(v)
+
+        # Find fastest backend for each operation
+        for op_index, value in sorted(op_time.items()):
+            op_type = value['type']
+            if op_type not in target_ops:
+                continue
+
+            logging.debug(f"----- Operation {op_index} -----")
+            op_infer_time = 0
+            for backend in backend_list:
+                if backend not in value:
+                    continue
+                backend_time = value[backend]
+
+                logging.debug(f"{backend}[{backend_time}]")
+                if op_infer_time == 0 or backend_time < op_infer_time:
+                    op_infer_time = backend_time
+                    backend_mapping[op_index] = backend
+
+        # Find default backend for Conv2D
+        default_backend = min(backend_infer_time, key=backend_infer_time.get)
+
+        # Create OP_BACKEND_MAP string
+        backend_conf = ""
+        for op_index, backend in sorted(backend_mapping.items()):
+            if backend != default_backend:
+                backend_conf += "{}={};".format(op_index, backend)
+
+        # Select fastet backend for each operation
+        logging.info("-------- Expected inference time ---------")
+        inference_time = {}
+        for backend in backend_list:
+            inference_time[backend] = 0
+            for op_index, value in sorted(op_time.items()):
+                if backend in value:
+                    inference_time[backend] += value[backend]
+                else:
+                    inference_time[backend] += value["cpu"]
+
+        schedule_time = 0
+        for op_index, value in sorted(op_time.items()):
+            op_type = value['type']
+            if op_type not in target_ops:
+                schedule_time += value["cpu"]
+                continue
+            else:
+                op_backend = backend_mapping[op_index]
+                schedule_time += value[op_backend]
+                if (default_backend != op_backend):
+                    logging.debug("[{}] {} -> {} : {:.2f} ms decrease".format(
+                        op_index, default_backend, op_backend,
+                        (value[default_backend] - value[op_backend]) / 1000))
+
+        for backend in backend_list:
+            logging.info(f"{backend} backend : {inference_time[backend]/1000:.2f} ms")
+        logging.info(f"Backend scheduling : {schedule_time / 1000:.2f} ms")
+
+        logging.info("-------- Backend Scheduling --------")
+        cmd = []
+        cmd += [f"OP_BACKEND_MAP={backend_conf}"]
+        for target_backend, op_list in backend_op_list.items():
+            if default_backend == target_backend:
+                for op in op_list:
+                    cmd += [f"OP_BACKEND_{op}={default_backend}"]
+        cmd += [f"BACKENDS={';'.join(backend_list)}"]
+        cmd += [f"RUY_THREADS={self.num_threads}"]
+        cmd += [f"XNNPACK_THREADS={self.num_threads}"]
+        logging.info(' '.join(cmd))
+
+        # Create nnpackage with backend mapping
+        dst_dir = Path(__file__).parent / 'nnpkg_sched' / self.nnpkg_dir.name
+        self.nnpkg_helper.copy(self.nnpkg_dir, dst_dir)
+        self.nnpkg_helper.add_config(dst_dir, cmd)
diff --git a/tools/stab/nnpkg_helper.py b/tools/stab/nnpkg_helper.py

new file mode 100644 (file)

index 0000000..7e68760
--- /dev/null
+++ b/tools/stab/nnpkg_helper.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json, logging
+from distutils.dir_util import copy_tree
+from pathlib import Path
+
+
+class NnpkgHelper:
+    """
+    Helper class for nnpackage
+    """
+
+    def __init__(self):
+        self.config_name = 'config.cfg'
+
+    def copy(self, src, dst):
+        copy_tree(str(src), str(dst))
+
+    def add_config(self, src, configs):
+        manifest_path = Path(src).resolve() / 'metadata' / 'MANIFEST'
+        config_path = Path(src).resolve() / 'metadata' / self.config_name
+
+        try:
+            # Read MANIFEST file
+            with open(manifest_path, 'r') as manifest_file:
+                data = json.load(manifest_file)
+
+            # Add configs to MANIFEST file
+            with open(manifest_path, 'w') as manifest_file:
+                data['configs'] = [self.config_name]
+                json.dump(data, manifest_file, indent=2)
+
+            # Write config.cfg file
+            with open(config_path, 'w') as config_file:
+                config_file.write('\n'.join(configs))
+
+            logging.info(f"Scheduled nnpackage is saved at {src}")
+
+        except IOError as e:
+            logging.warn(e)
+        except:
+            logging.warn("Error")
diff --git a/tools/stab/op_list.txt b/tools/stab/op_list.txt

new file mode 100644 (file)

index 0000000..7c55656
--- /dev/null
+++ b/tools/stab/op_list.txt
@@ -0,0 +1,2 @@
+ruy:Conv2D,FullyConnected
+xnnpack:Conv2D,DepthwiseConv2D,FullyConnected
diff --git a/tools/stab/op_list_parser.py b/tools/stab/op_list_parser.py

new file mode 100644 (file)

index 0000000..d9fba50
--- /dev/null
+++ b/tools/stab/op_list_parser.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+
+
+class OpListParser():
+    """
+    Read op_list.txt to create supported operation list for each backend
+
+    TODO : Reads supported tensor type for each operation (FP32 or INT8)
+    """
+
+    def __init__(self):
+        self.file_name = "op_list.txt"
+        self.op_list_file = Path(__file__).parent / self.file_name
+
+    def parse(self):
+        backend_op_list = {}
+        with open(self.op_list_file, 'r') as f:
+            lines = f.readlines()
+            for line in lines:
+                line = line.rstrip()
+                backend, _, op_list_str = line.partition(':')
+                op_list = op_list_str.split(',')
+                backend_op_list[backend] = op_list
+        return backend_op_list
diff --git a/tools/stab/remote.py b/tools/stab/remote.py

new file mode 100644 (file)

index 0000000..d630211
--- /dev/null
+++ b/tools/stab/remote.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess, logging
+from pathlib import Path
+
+
+class RemoteSSH():
+    """
+    Execute commands on remove device using SSH
+
+    TODO : Using SSH library instead of direct ssh call
+    """
+
+    def __init__(self, user, ip, nnpkg_dir, num_threads):
+        self.base_dir = Path('/tmp/ONE')
+        self.trace_dir = 'traces'
+        self.host = f"{user}@{ip}" if user != None else ip
+        self.nnpkg_dir = Path(nnpkg_dir).resolve()
+        self.nnpkg_name = self.nnpkg_dir.name
+        self.root_path = Path(__file__).resolve().parents[2]
+        self.num_threads = num_threads
+
+    def sync_binary(self):
+        bin_dir = self.root_path / 'Product/armv7l-linux.release/out'
+        if (not bin_dir.is_dir()):
+            logging.warn(f"Build dir [{bin_dir}] is not exist")
+            exit()
+        elif (not self.nnpkg_dir.is_dir()):
+            logging.warn(f"nnpackage dir [{self.nnpkg_dir}] is not exist")
+            exit()
+        else:
+            # Create temporary folder
+            subprocess.call(
+                ["ssh", f"{self.host}", "mkdir", "-p", self.base_dir / self.trace_dir])
+            # Syne ONE runtime
+            subprocess.call([
+                "rsync", "-az", "--exclude", "test-suite.tar.gz", bin_dir,
+                self.remote(self.base_dir)
+            ])
+            # Sync target nnpackage
+            subprocess.call(["rsync", "-az", self.nnpkg_dir, self.remote(self.base_dir)])
+
+    def sync_trace(self, backend):
+        remote_trace_path = self.remote_trace_path(backend)
+        local_trace_path = self.local_trace_path(backend)
+        local_trace_path.parent.mkdir(parents=True, exist_ok=True)
+        logging.debug(f"Remote trace path : {self.remote(remote_trace_path)}")
+        logging.debug(f"Local trace path : {local_trace_path}")
+        # Sync trace file
+        subprocess.call(
+            ["rsync", "-az",
+             self.remote(remote_trace_path), local_trace_path])
+
+    def profile_backend(self, backend, backend_op_list):
+        nnpkg_run_path = self.base_dir / 'out/bin/nnpackage_run'
+        nnpkg_path = self.base_dir / self.nnpkg_dir.name
+
+        cmd = ["ssh", f"{self.host}"]
+        cmd += [f"TRACE_FILEPATH={self.remote_trace_path(backend)}"]
+        for target_backend, op_list in backend_op_list.items():
+            if backend == target_backend:
+                for op in op_list:
+                    cmd += [f"OP_BACKEND_{op}={backend}"]
+        cmd += [f"XNNPACK_THREADS={self.num_threads}"]
+        cmd += [f"RUY_THREADS={self.num_threads}"]
+        cmd += [f"BACKENDS=\'{';'.join(['cpu', backend])}\'"]
+        cmd += [f"{nnpkg_run_path}"]
+        cmd += [f"--nnpackage"]
+        cmd += [f"{nnpkg_path}"]
+        cmd += [f"-w5 -r50"]
+        logging.debug(f"SSH command : {' '.join(cmd)}")
+        subprocess.call(cmd)
+
+    def base_path():
+        pass
+
+    def remote(self, path):
+        return f"{self.host}:{path}"
+
+    # TODO Create class for path generation
+    def trace_name(self, backend):
+        return f"{self.nnpkg_name}_{backend}_{self.num_threads}"
+
+    def remote_trace_path(self, backend):
+        return self.base_dir / self.trace_dir / self.trace_name(backend)
+
+    def local_trace_path(self, backend):
+        return Path(__file__).parent / self.trace_dir / self.trace_name(backend)
diff --git a/tools/stab/stab.py b/tools/stab/stab.py

new file mode 100644 (file)

index 0000000..7a069df
--- /dev/null
+++ b/tools/stab/stab.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse, logging, sys
+from backend_profiler import BackendProfiler
+from backend_scheduler import BackendScheduler
+
+
+def main(args):
+    if args.profile:
+        backend_profiler = BackendProfiler(args.user, args.ip, args.nnpackage,
+                                           args.num_threads)
+        backend_profiler.sync()
+        backend_profiler.profile()
+    backend_scheduler = BackendScheduler(args.nnpackage, args.num_threads)
+    backend_scheduler.schedule()
+
+
+if __name__ == "__main__":
+    arg_parser = argparse.ArgumentParser(add_help=False)
+    required = arg_parser.add_argument_group('required arguments')
+    optional = arg_parser.add_argument_group('optional arguments')
+
+    # Add back help
+    optional.add_argument(
+        '-h',
+        '--help',
+        action='help',
+        default=argparse.SUPPRESS,
+        help='show this help message and exit')
+    required.add_argument(
+        "--nnpackage", type=str, required=True, help="nnpackage folder to profile")
+    required.add_argument(
+        "--ip", type=str, required=True, help="IP address of remote client")
+    optional.add_argument(
+        "-n",
+        "--num_threads",
+        type=int,
+        default=1,
+        help="Number of threads used by one runtime")
+    optional.add_argument("-u", "--user", type=str, help="User of remote client")
+    optional.add_argument(
+        "-v",
+        "--verbose",
+        action='store_const',
+        dest="verbose_level",
+        default=logging.INFO,
+        const=logging.DEBUG,
+        help="Print verbose message")
+    optional.add_argument(
+        "--no-profile", dest='profile', action='store_false', help="Disable profiling")
+    optional.set_defaults(profile=True)
+    args = arg_parser.parse_args()
+
+    logging.basicConfig(
+        stream=sys.stdout,
+        level=args.verbose_level,
+        format="[%(levelname).5s] %(message)s")
+
+    main(args)
diff --git a/tools/tflitefile_tool/select_operator.py b/tools/tflitefile_tool/select_operator.py

index a1aa6f2639ee49c3e6330fef025fd8b64850e145..dccb3454f5d592f91712f237cac0873312355c39 100755 (executable)
--- a/tools/tflitefile_tool/select_operator.py
+++ b/tools/tflitefile_tool/select_operator.py
@@ -188,6 +188,11 @@ def GenerateQuantization(new_builder, selected_quantization):
          tflite.QuantizationParameters.QuantizationParametersAddZeroPoint(
              new_builder, new_zeropoint)
  
+    quantized_dimension = selected_quantization.QuantizedDimension()
+    if quantized_dimension != 0:
+        tflite.QuantizationParameters.QuantizationParametersAddQuantizedDimension(
+            new_builder, quantized_dimension)
+
      return tflite.QuantizationParameters.QuantizationParametersEnd(new_builder)
author	Chunseok Lee <chunseok.lee@samsung.com>
	Tue, 20 Apr 2021 09:01:41 +0000 (18:01 +0900)
committer	Chunseok Lee <chunseok.lee@samsung.com>
	Tue, 20 Apr 2021 09:01:41 +0000 (18:01 +0900)